Merge branch 'xfs-4.9-reflink-prep' into for-next
This commit is contained in:
commit
79ad576124
20 changed files with 719 additions and 116 deletions
|
@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \
|
|||
xfs_inode_fork.o \
|
||||
xfs_inode_buf.o \
|
||||
xfs_log_rlimit.o \
|
||||
xfs_ag_resv.o \
|
||||
xfs_rmap.o \
|
||||
xfs_rmap_btree.o \
|
||||
xfs_sb.o \
|
||||
|
|
325
fs/xfs/libxfs/xfs_ag_resv.c
Normal file
325
fs/xfs/libxfs/xfs_ag_resv.c
Normal file
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
||||
*
|
||||
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_cksum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_bmap_btree.h"
|
||||
#include "xfs_ag_resv.h"
|
||||
#include "xfs_trans_space.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_btree.h"
|
||||
|
||||
/*
|
||||
* Per-AG Block Reservations
|
||||
*
|
||||
* For some kinds of allocation group metadata structures, it is advantageous
|
||||
* to reserve a small number of blocks in each AG so that future expansions of
|
||||
* that data structure do not encounter ENOSPC because errors during a btree
|
||||
* split cause the filesystem to go offline.
|
||||
*
|
||||
* Prior to the introduction of reflink, this wasn't an issue because the free
|
||||
* space btrees maintain a reserve of space (the AGFL) to handle any expansion
|
||||
* that may be necessary; and allocations of other metadata (inodes, BMBT,
|
||||
* dir/attr) aren't restricted to a single AG. However, with reflink it is
|
||||
* possible to allocate all the space in an AG, have subsequent reflink/CoW
|
||||
* activity expand the refcount btree, and discover that there's no space left
|
||||
* to handle that expansion. Since we can calculate the maximum size of the
|
||||
* refcount btree, we can reserve space for it and avoid ENOSPC.
|
||||
*
|
||||
* Handling per-AG reservations consists of three changes to the allocator's
|
||||
* behavior: First, because these reservations are always needed, we decrease
|
||||
* the ag_max_usable counter to reflect the size of the AG after the reserved
|
||||
* blocks are taken. Second, the reservations must be reflected in the
|
||||
* fdblocks count to maintain proper accounting. Third, each AG must maintain
|
||||
* its own reserved block counter so that we can calculate the amount of space
|
||||
* that must remain free to maintain the reservations. Fourth, the "remaining
|
||||
* reserved blocks" count must be used when calculating the length of the
|
||||
* longest free extent in an AG and to clamp maxlen in the per-AG allocation
|
||||
* functions. In other words, we maintain a virtual allocation via in-core
|
||||
* accounting tricks so that we don't have to clean up after a crash. :)
|
||||
*
|
||||
* Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
|
||||
* values via struct xfs_alloc_arg or directly to the xfs_free_extent
|
||||
* function. It might seem a little funny to maintain a reservoir of blocks
|
||||
* to feed another reservoir, but the AGFL only holds enough blocks to get
|
||||
* through the next transaction. The per-AG reservation is to ensure (we
|
||||
* hope) that each AG never runs out of blocks. Each data structure wanting
|
||||
* to use the reservation system should update ask/used in xfs_ag_resv_init.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Are we critically low on blocks? For now we'll define that as the number
|
||||
* of blocks we can get our hands on being less than 10% of what we reserved
|
||||
* or less than some arbitrary number (maximum btree height).
|
||||
*/
|
||||
bool
|
||||
xfs_ag_resv_critical(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
xfs_extlen_t avail;
|
||||
xfs_extlen_t orig;
|
||||
|
||||
switch (type) {
|
||||
case XFS_AG_RESV_METADATA:
|
||||
avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
|
||||
orig = pag->pag_meta_resv.ar_asked;
|
||||
break;
|
||||
case XFS_AG_RESV_AGFL:
|
||||
avail = pag->pagf_freeblks + pag->pagf_flcount -
|
||||
pag->pag_meta_resv.ar_reserved;
|
||||
orig = pag->pag_agfl_resv.ar_asked;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
trace_xfs_ag_resv_critical(pag, type, avail);
|
||||
|
||||
/* Critically low if less than 10% or max btree height remains. */
|
||||
return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS;
|
||||
}
|
||||
|
||||
/*
|
||||
* How many blocks are reserved but not used, and therefore must not be
|
||||
* allocated away?
|
||||
*/
|
||||
xfs_extlen_t
|
||||
xfs_ag_resv_needed(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
xfs_extlen_t len;
|
||||
|
||||
len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
|
||||
switch (type) {
|
||||
case XFS_AG_RESV_METADATA:
|
||||
case XFS_AG_RESV_AGFL:
|
||||
len -= xfs_perag_resv(pag, type)->ar_reserved;
|
||||
break;
|
||||
case XFS_AG_RESV_NONE:
|
||||
/* empty */
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
trace_xfs_ag_resv_needed(pag, type, len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/* Clean out a reservation */
|
||||
static int
|
||||
__xfs_ag_resv_free(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
struct xfs_ag_resv *resv;
|
||||
xfs_extlen_t oldresv;
|
||||
int error;
|
||||
|
||||
trace_xfs_ag_resv_free(pag, type, 0);
|
||||
|
||||
resv = xfs_perag_resv(pag, type);
|
||||
pag->pag_mount->m_ag_max_usable += resv->ar_asked;
|
||||
/*
|
||||
* AGFL blocks are always considered "free", so whatever
|
||||
* was reserved at mount time must be given back at umount.
|
||||
*/
|
||||
if (type == XFS_AG_RESV_AGFL)
|
||||
oldresv = resv->ar_orig_reserved;
|
||||
else
|
||||
oldresv = resv->ar_reserved;
|
||||
error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
|
||||
resv->ar_reserved = 0;
|
||||
resv->ar_asked = 0;
|
||||
|
||||
if (error)
|
||||
trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
|
||||
error, _RET_IP_);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Free a per-AG reservation. */
|
||||
int
|
||||
xfs_ag_resv_free(
|
||||
struct xfs_perag *pag)
|
||||
{
|
||||
int error;
|
||||
int err2;
|
||||
|
||||
error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
|
||||
err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
|
||||
if (err2 && !error)
|
||||
error = err2;
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
__xfs_ag_resv_init(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type,
|
||||
xfs_extlen_t ask,
|
||||
xfs_extlen_t used)
|
||||
{
|
||||
struct xfs_mount *mp = pag->pag_mount;
|
||||
struct xfs_ag_resv *resv;
|
||||
int error;
|
||||
|
||||
resv = xfs_perag_resv(pag, type);
|
||||
if (used > ask)
|
||||
ask = used;
|
||||
resv->ar_asked = ask;
|
||||
resv->ar_reserved = resv->ar_orig_reserved = ask - used;
|
||||
mp->m_ag_max_usable -= ask;
|
||||
|
||||
trace_xfs_ag_resv_init(pag, type, ask);
|
||||
|
||||
error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
|
||||
if (error)
|
||||
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
|
||||
error, _RET_IP_);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Create a per-AG block reservation. */
|
||||
int
|
||||
xfs_ag_resv_init(
|
||||
struct xfs_perag *pag)
|
||||
{
|
||||
xfs_extlen_t ask;
|
||||
xfs_extlen_t used;
|
||||
int error = 0;
|
||||
|
||||
/* Create the metadata reservation. */
|
||||
if (pag->pag_meta_resv.ar_asked == 0) {
|
||||
ask = used = 0;
|
||||
|
||||
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
|
||||
ask, used);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Create the AGFL metadata reservation */
|
||||
if (pag->pag_agfl_resv.ar_asked == 0) {
|
||||
ask = used = 0;
|
||||
|
||||
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Allocate a block from the reservation. */
|
||||
void
|
||||
xfs_ag_resv_alloc_extent(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type,
|
||||
struct xfs_alloc_arg *args)
|
||||
{
|
||||
struct xfs_ag_resv *resv;
|
||||
xfs_extlen_t len;
|
||||
uint field;
|
||||
|
||||
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
|
||||
|
||||
switch (type) {
|
||||
case XFS_AG_RESV_METADATA:
|
||||
case XFS_AG_RESV_AGFL:
|
||||
resv = xfs_perag_resv(pag, type);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
/* fall through */
|
||||
case XFS_AG_RESV_NONE:
|
||||
field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
|
||||
XFS_TRANS_SB_FDBLOCKS;
|
||||
xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
|
||||
return;
|
||||
}
|
||||
|
||||
len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
|
||||
resv->ar_reserved -= len;
|
||||
if (type == XFS_AG_RESV_AGFL)
|
||||
return;
|
||||
/* Allocations of reserved blocks only need on-disk sb updates... */
|
||||
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
|
||||
/* ...but non-reserved blocks need in-core and on-disk updates. */
|
||||
if (args->len > len)
|
||||
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
|
||||
-((int64_t)args->len - len));
|
||||
}
|
||||
|
||||
/* Free a block to the reservation. */
|
||||
void
|
||||
xfs_ag_resv_free_extent(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type,
|
||||
struct xfs_trans *tp,
|
||||
xfs_extlen_t len)
|
||||
{
|
||||
xfs_extlen_t leftover;
|
||||
struct xfs_ag_resv *resv;
|
||||
|
||||
trace_xfs_ag_resv_free_extent(pag, type, len);
|
||||
|
||||
switch (type) {
|
||||
case XFS_AG_RESV_METADATA:
|
||||
case XFS_AG_RESV_AGFL:
|
||||
resv = xfs_perag_resv(pag, type);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
/* fall through */
|
||||
case XFS_AG_RESV_NONE:
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
|
||||
return;
|
||||
}
|
||||
|
||||
leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
|
||||
resv->ar_reserved += leftover;
|
||||
if (type == XFS_AG_RESV_AGFL)
|
||||
return;
|
||||
/* Freeing into the reserved pool only requires on-disk update... */
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
|
||||
/* ...but freeing beyond that requires in-core and on-disk update. */
|
||||
if (len > leftover)
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
|
||||
}
|
35
fs/xfs/libxfs/xfs_ag_resv.h
Normal file
35
fs/xfs/libxfs/xfs_ag_resv.h
Normal file
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
||||
*
|
||||
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
#ifndef __XFS_AG_RESV_H__
|
||||
#define __XFS_AG_RESV_H__
|
||||
|
||||
int xfs_ag_resv_free(struct xfs_perag *pag);
|
||||
int xfs_ag_resv_init(struct xfs_perag *pag);
|
||||
|
||||
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
|
||||
xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type);
|
||||
|
||||
void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
|
||||
struct xfs_alloc_arg *args);
|
||||
void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
|
||||
struct xfs_trans *tp, xfs_extlen_t len);
|
||||
|
||||
#endif /* __XFS_AG_RESV_H__ */
|
|
@ -37,6 +37,7 @@
|
|||
#include "xfs_trans.h"
|
||||
#include "xfs_buf_item.h"
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_ag_resv.h"
|
||||
|
||||
struct workqueue_struct *xfs_alloc_wq;
|
||||
|
||||
|
@ -74,14 +75,8 @@ xfs_prealloc_blocks(
|
|||
* extents need to be actually allocated. To get around this, we explicitly set
|
||||
* aside a few blocks which will not be reserved in delayed allocation.
|
||||
*
|
||||
* When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
|
||||
* and 4 more to handle a potential split of the file's bmap btree.
|
||||
*
|
||||
* When rmap is enabled, we must also be able to handle two rmap btree inserts
|
||||
* to record both the file data extent and a new bmbt block. The bmbt block
|
||||
* might not be in the same AG as the file data extent. In the worst case
|
||||
* the bmap btree splits multiple levels and all the new blocks come from
|
||||
* different AGs, so set aside enough to handle rmap btree splits in all AGs.
|
||||
* We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
|
||||
* potential split of the file's bmap btree.
|
||||
*/
|
||||
unsigned int
|
||||
xfs_alloc_set_aside(
|
||||
|
@ -90,8 +85,6 @@ xfs_alloc_set_aside(
|
|||
unsigned int blocks;
|
||||
|
||||
blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
|
||||
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
|
||||
blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
|
||||
return blocks;
|
||||
}
|
||||
|
||||
|
@ -680,12 +673,29 @@ xfs_alloc_ag_vextent(
|
|||
xfs_alloc_arg_t *args) /* argument structure for allocation */
|
||||
{
|
||||
int error=0;
|
||||
xfs_extlen_t reservation;
|
||||
xfs_extlen_t oldmax;
|
||||
|
||||
ASSERT(args->minlen > 0);
|
||||
ASSERT(args->maxlen > 0);
|
||||
ASSERT(args->minlen <= args->maxlen);
|
||||
ASSERT(args->mod < args->prod);
|
||||
ASSERT(args->alignment > 0);
|
||||
|
||||
/*
|
||||
* Clamp maxlen to the amount of free space minus any reservations
|
||||
* that have been made.
|
||||
*/
|
||||
oldmax = args->maxlen;
|
||||
reservation = xfs_ag_resv_needed(args->pag, args->resv);
|
||||
if (args->maxlen > args->pag->pagf_freeblks - reservation)
|
||||
args->maxlen = args->pag->pagf_freeblks - reservation;
|
||||
if (args->maxlen == 0) {
|
||||
args->agbno = NULLAGBLOCK;
|
||||
args->maxlen = oldmax;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Branch to correct routine based on the type.
|
||||
*/
|
||||
|
@ -705,12 +715,14 @@ xfs_alloc_ag_vextent(
|
|||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
args->maxlen = oldmax;
|
||||
|
||||
if (error || args->agbno == NULLAGBLOCK)
|
||||
return error;
|
||||
|
||||
ASSERT(args->len >= args->minlen);
|
||||
ASSERT(args->len <= args->maxlen);
|
||||
ASSERT(!args->wasfromfl || !args->isfl);
|
||||
ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
|
||||
ASSERT(args->agbno % args->alignment == 0);
|
||||
|
||||
/* if not file data, insert new block into the reverse map btree */
|
||||
|
@ -732,12 +744,7 @@ xfs_alloc_ag_vextent(
|
|||
args->agbno, args->len));
|
||||
}
|
||||
|
||||
if (!args->isfl) {
|
||||
xfs_trans_mod_sb(args->tp, args->wasdel ?
|
||||
XFS_TRANS_SB_RES_FDBLOCKS :
|
||||
XFS_TRANS_SB_FDBLOCKS,
|
||||
-((long)(args->len)));
|
||||
}
|
||||
xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
|
||||
|
||||
XFS_STATS_INC(args->mp, xs_allocx);
|
||||
XFS_STATS_ADD(args->mp, xs_allocb, args->len);
|
||||
|
@ -1583,6 +1590,7 @@ xfs_alloc_ag_vextent_small(
|
|||
int *stat) /* status: 0-freelist, 1-normal/none */
|
||||
{
|
||||
struct xfs_owner_info oinfo;
|
||||
struct xfs_perag *pag;
|
||||
int error;
|
||||
xfs_agblock_t fbno;
|
||||
xfs_extlen_t flen;
|
||||
|
@ -1600,7 +1608,8 @@ xfs_alloc_ag_vextent_small(
|
|||
* to respect minleft even when pulling from the
|
||||
* freelist.
|
||||
*/
|
||||
else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
|
||||
else if (args->minlen == 1 && args->alignment == 1 &&
|
||||
args->resv != XFS_AG_RESV_AGFL &&
|
||||
(be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
|
||||
> args->minleft)) {
|
||||
error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
|
||||
|
@ -1629,13 +1638,18 @@ xfs_alloc_ag_vextent_small(
|
|||
/*
|
||||
* If we're feeding an AGFL block to something that
|
||||
* doesn't live in the free space, we need to clear
|
||||
* out the OWN_AG rmap.
|
||||
* out the OWN_AG rmap and add the block back to
|
||||
* the AGFL per-AG reservation.
|
||||
*/
|
||||
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
|
||||
error = xfs_rmap_free(args->tp, args->agbp, args->agno,
|
||||
fbno, 1, &oinfo);
|
||||
if (error)
|
||||
goto error0;
|
||||
pag = xfs_perag_get(args->mp, args->agno);
|
||||
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
|
||||
args->tp, 1);
|
||||
xfs_perag_put(pag);
|
||||
|
||||
*stat = 0;
|
||||
return 0;
|
||||
|
@ -1683,7 +1697,7 @@ xfs_free_ag_extent(
|
|||
xfs_agblock_t bno,
|
||||
xfs_extlen_t len,
|
||||
struct xfs_owner_info *oinfo,
|
||||
int isfl)
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
|
||||
xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
|
||||
|
@ -1911,21 +1925,22 @@ xfs_free_ag_extent(
|
|||
*/
|
||||
pag = xfs_perag_get(mp, agno);
|
||||
error = xfs_alloc_update_counters(tp, pag, agbp, len);
|
||||
xfs_ag_resv_free_extent(pag, type, tp, len);
|
||||
xfs_perag_put(pag);
|
||||
if (error)
|
||||
goto error0;
|
||||
|
||||
if (!isfl)
|
||||
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
|
||||
XFS_STATS_INC(mp, xs_freex);
|
||||
XFS_STATS_ADD(mp, xs_freeb, len);
|
||||
|
||||
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
|
||||
trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
|
||||
haveleft, haveright);
|
||||
|
||||
return 0;
|
||||
|
||||
error0:
|
||||
trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
|
||||
trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
|
||||
-1, -1);
|
||||
if (bno_cur)
|
||||
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
|
||||
if (cnt_cur)
|
||||
|
@ -1950,21 +1965,43 @@ xfs_alloc_compute_maxlevels(
|
|||
}
|
||||
|
||||
/*
|
||||
* Find the length of the longest extent in an AG.
|
||||
* Find the length of the longest extent in an AG. The 'need' parameter
|
||||
* specifies how much space we're going to need for the AGFL and the
|
||||
* 'reserved' parameter tells us how many blocks in this AG are reserved for
|
||||
* other callers.
|
||||
*/
|
||||
xfs_extlen_t
|
||||
xfs_alloc_longest_free_extent(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_perag *pag,
|
||||
xfs_extlen_t need)
|
||||
xfs_extlen_t need,
|
||||
xfs_extlen_t reserved)
|
||||
{
|
||||
xfs_extlen_t delta = 0;
|
||||
|
||||
/*
|
||||
* If the AGFL needs a recharge, we'll have to subtract that from the
|
||||
* longest extent.
|
||||
*/
|
||||
if (need > pag->pagf_flcount)
|
||||
delta = need - pag->pagf_flcount;
|
||||
|
||||
/*
|
||||
* If we cannot maintain others' reservations with space from the
|
||||
* not-longest freesp extents, we'll have to subtract /that/ from
|
||||
* the longest extent too.
|
||||
*/
|
||||
if (pag->pagf_freeblks - pag->pagf_longest < reserved)
|
||||
delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
|
||||
|
||||
/*
|
||||
* If the longest extent is long enough to satisfy all the
|
||||
* reservations and AGFL rules in place, we can return this extent.
|
||||
*/
|
||||
if (pag->pagf_longest > delta)
|
||||
return pag->pagf_longest - delta;
|
||||
|
||||
/* Otherwise, let the caller try for 1 block if there's space. */
|
||||
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
|
||||
}
|
||||
|
||||
|
@ -2004,20 +2041,24 @@ xfs_alloc_space_available(
|
|||
{
|
||||
struct xfs_perag *pag = args->pag;
|
||||
xfs_extlen_t longest;
|
||||
xfs_extlen_t reservation; /* blocks that are still reserved */
|
||||
int available;
|
||||
|
||||
if (flags & XFS_ALLOC_FLAG_FREEING)
|
||||
return true;
|
||||
|
||||
reservation = xfs_ag_resv_needed(pag, args->resv);
|
||||
|
||||
/* do we have enough contiguous free space for the allocation? */
|
||||
longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
|
||||
longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
|
||||
reservation);
|
||||
if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
|
||||
return false;
|
||||
|
||||
/* do have enough free space remaining for the allocation? */
|
||||
/* do we have enough free space remaining for the allocation? */
|
||||
available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
|
||||
min_free - args->total);
|
||||
if (available < (int)args->minleft)
|
||||
reservation - min_free - args->total);
|
||||
if (available < (int)args->minleft || available <= 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -2124,7 +2165,7 @@ xfs_alloc_fix_freelist(
|
|||
if (error)
|
||||
goto out_agbp_relse;
|
||||
error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
|
||||
&targs.oinfo, 1);
|
||||
&targs.oinfo, XFS_AG_RESV_AGFL);
|
||||
if (error)
|
||||
goto out_agbp_relse;
|
||||
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
|
||||
|
@ -2135,7 +2176,7 @@ xfs_alloc_fix_freelist(
|
|||
targs.mp = mp;
|
||||
targs.agbp = agbp;
|
||||
targs.agno = args->agno;
|
||||
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
|
||||
targs.alignment = targs.minlen = targs.prod = 1;
|
||||
targs.type = XFS_ALLOCTYPE_THIS_AG;
|
||||
targs.pag = pag;
|
||||
error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
|
||||
|
@ -2146,6 +2187,7 @@ xfs_alloc_fix_freelist(
|
|||
while (pag->pagf_flcount < need) {
|
||||
targs.agbno = 0;
|
||||
targs.maxlen = need - pag->pagf_flcount;
|
||||
targs.resv = XFS_AG_RESV_AGFL;
|
||||
|
||||
/* Allocate as many blocks as possible at once. */
|
||||
error = xfs_alloc_ag_vextent(&targs);
|
||||
|
@ -2825,7 +2867,8 @@ xfs_free_extent(
|
|||
struct xfs_trans *tp, /* transaction pointer */
|
||||
xfs_fsblock_t bno, /* starting block number of extent */
|
||||
xfs_extlen_t len, /* length of extent */
|
||||
struct xfs_owner_info *oinfo) /* extent owner */
|
||||
struct xfs_owner_info *oinfo, /* extent owner */
|
||||
enum xfs_ag_resv_type type) /* block reservation type */
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
struct xfs_buf *agbp;
|
||||
|
@ -2834,6 +2877,7 @@ xfs_free_extent(
|
|||
int error;
|
||||
|
||||
ASSERT(len != 0);
|
||||
ASSERT(type != XFS_AG_RESV_AGFL);
|
||||
|
||||
if (XFS_TEST_ERROR(false, mp,
|
||||
XFS_ERRTAG_FREE_EXTENT,
|
||||
|
@ -2851,7 +2895,7 @@ xfs_free_extent(
|
|||
agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
|
||||
err);
|
||||
|
||||
error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
|
||||
error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
|
||||
if (error)
|
||||
goto err;
|
||||
|
||||
|
|
|
@ -87,10 +87,10 @@ typedef struct xfs_alloc_arg {
|
|||
xfs_alloctype_t otype; /* original allocation type */
|
||||
char wasdel; /* set if allocation was prev delayed */
|
||||
char wasfromfl; /* set if allocation is from freelist */
|
||||
char isfl; /* set if is freelist blocks - !acctg */
|
||||
char userdata; /* mask defining userdata treatment */
|
||||
xfs_fsblock_t firstblock; /* io first block allocated */
|
||||
struct xfs_owner_info oinfo; /* owner of blocks being allocated */
|
||||
enum xfs_ag_resv_type resv; /* block reservation to use */
|
||||
} xfs_alloc_arg_t;
|
||||
|
||||
/*
|
||||
|
@ -106,7 +106,8 @@ unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
|
|||
unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
|
||||
|
||||
xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
|
||||
struct xfs_perag *pag, xfs_extlen_t need);
|
||||
struct xfs_perag *pag, xfs_extlen_t need,
|
||||
xfs_extlen_t reserved);
|
||||
unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
|
||||
struct xfs_perag *pag);
|
||||
|
||||
|
@ -184,7 +185,8 @@ xfs_free_extent(
|
|||
struct xfs_trans *tp, /* transaction pointer */
|
||||
xfs_fsblock_t bno, /* starting block number of extent */
|
||||
xfs_extlen_t len, /* length of extent */
|
||||
struct xfs_owner_info *oinfo);/* extent owner */
|
||||
struct xfs_owner_info *oinfo, /* extent owner */
|
||||
enum xfs_ag_resv_type type); /* block reservation type */
|
||||
|
||||
int /* error */
|
||||
xfs_alloc_lookup_ge(
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include "xfs_attr_leaf.h"
|
||||
#include "xfs_filestream.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_ag_resv.h"
|
||||
|
||||
|
||||
kmem_zone_t *xfs_bmap_free_item_zone;
|
||||
|
@ -3501,7 +3502,8 @@ xfs_bmap_longest_free_extent(
|
|||
}
|
||||
|
||||
longest = xfs_alloc_longest_free_extent(mp, pag,
|
||||
xfs_alloc_min_freelist(mp, pag));
|
||||
xfs_alloc_min_freelist(mp, pag),
|
||||
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
|
||||
if (*blen < longest)
|
||||
*blen = longest;
|
||||
|
||||
|
@ -3781,7 +3783,7 @@ xfs_bmap_btalloc(
|
|||
}
|
||||
args.minleft = ap->minleft;
|
||||
args.wasdel = ap->wasdel;
|
||||
args.isfl = 0;
|
||||
args.resv = XFS_AG_RESV_NONE;
|
||||
args.userdata = ap->userdata;
|
||||
if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
|
||||
args.ip = ap->ip;
|
||||
|
|
|
@ -2070,7 +2070,7 @@ __xfs_btree_updkeys(
|
|||
struct xfs_buf *bp0,
|
||||
bool force_all)
|
||||
{
|
||||
union xfs_btree_bigkey key; /* keys from current level */
|
||||
union xfs_btree_key key; /* keys from current level */
|
||||
union xfs_btree_key *lkey; /* keys from the next level up */
|
||||
union xfs_btree_key *hkey;
|
||||
union xfs_btree_key *nlkey; /* keys from the next level up */
|
||||
|
@ -2086,7 +2086,7 @@ __xfs_btree_updkeys(
|
|||
|
||||
trace_xfs_btree_updkeys(cur, level, bp0);
|
||||
|
||||
lkey = (union xfs_btree_key *)&key;
|
||||
lkey = &key;
|
||||
hkey = xfs_btree_high_key_from_key(cur, lkey);
|
||||
xfs_btree_get_keys(cur, block, lkey);
|
||||
for (level++; level < cur->bc_nlevels; level++) {
|
||||
|
@ -3226,7 +3226,7 @@ xfs_btree_insrec(
|
|||
struct xfs_buf *bp; /* buffer for block */
|
||||
union xfs_btree_ptr nptr; /* new block ptr */
|
||||
struct xfs_btree_cur *ncur; /* new btree cursor */
|
||||
union xfs_btree_bigkey nkey; /* new block key */
|
||||
union xfs_btree_key nkey; /* new block key */
|
||||
union xfs_btree_key *lkey;
|
||||
int optr; /* old key/record index */
|
||||
int ptr; /* key/record index */
|
||||
|
@ -3241,7 +3241,7 @@ xfs_btree_insrec(
|
|||
XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
|
||||
|
||||
ncur = NULL;
|
||||
lkey = (union xfs_btree_key *)&nkey;
|
||||
lkey = &nkey;
|
||||
|
||||
/*
|
||||
* If we have an external root pointer, and we've made it to the
|
||||
|
@ -3444,14 +3444,14 @@ xfs_btree_insert(
|
|||
union xfs_btree_ptr nptr; /* new block number (split result) */
|
||||
struct xfs_btree_cur *ncur; /* new cursor (split result) */
|
||||
struct xfs_btree_cur *pcur; /* previous level's cursor */
|
||||
union xfs_btree_bigkey bkey; /* key of block to insert */
|
||||
union xfs_btree_key bkey; /* key of block to insert */
|
||||
union xfs_btree_key *key;
|
||||
union xfs_btree_rec rec; /* record to insert */
|
||||
|
||||
level = 0;
|
||||
ncur = NULL;
|
||||
pcur = cur;
|
||||
key = (union xfs_btree_key *)&bkey;
|
||||
key = &bkey;
|
||||
|
||||
xfs_btree_set_ptr_null(cur, &nptr);
|
||||
|
||||
|
@ -4797,3 +4797,50 @@ xfs_btree_query_range(
|
|||
return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
|
||||
fn, priv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of blocks needed to store a given number of records
|
||||
* in a short-format (per-AG metadata) btree.
|
||||
*/
|
||||
xfs_extlen_t
|
||||
xfs_btree_calc_size(
|
||||
struct xfs_mount *mp,
|
||||
uint *limits,
|
||||
unsigned long long len)
|
||||
{
|
||||
int level;
|
||||
int maxrecs;
|
||||
xfs_extlen_t rval;
|
||||
|
||||
maxrecs = limits[0];
|
||||
for (level = 0, rval = 0; len > 1; level++) {
|
||||
len += maxrecs - 1;
|
||||
do_div(len, maxrecs);
|
||||
maxrecs = limits[1];
|
||||
rval += len;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_btree_count_blocks_helper(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
void *data)
|
||||
{
|
||||
xfs_extlen_t *blocks = data;
|
||||
(*blocks)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Count the blocks in a btree and return the result in *blocks. */
|
||||
int
|
||||
xfs_btree_count_blocks(
|
||||
struct xfs_btree_cur *cur,
|
||||
xfs_extlen_t *blocks)
|
||||
{
|
||||
*blocks = 0;
|
||||
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
|
||||
blocks);
|
||||
}
|
||||
|
|
|
@ -37,30 +37,18 @@ union xfs_btree_ptr {
|
|||
__be64 l; /* long form ptr */
|
||||
};
|
||||
|
||||
/*
|
||||
* The in-core btree key. Overlapping btrees actually store two keys
|
||||
* per pointer, so we reserve enough memory to hold both. The __*bigkey
|
||||
* items should never be accessed directly.
|
||||
*/
|
||||
union xfs_btree_key {
|
||||
struct xfs_bmbt_key bmbt;
|
||||
xfs_bmdr_key_t bmbr; /* bmbt root block */
|
||||
xfs_alloc_key_t alloc;
|
||||
struct xfs_inobt_key inobt;
|
||||
struct xfs_rmap_key rmap;
|
||||
};
|
||||
|
||||
/*
|
||||
* In-core key that holds both low and high keys for overlapped btrees.
|
||||
* The two keys are packed next to each other on disk, so do the same
|
||||
* in memory. Preserve the existing xfs_btree_key as a single key to
|
||||
* avoid the mental model breakage that would happen if we passed a
|
||||
* bigkey into a function that operates on a single key.
|
||||
*/
|
||||
union xfs_btree_bigkey {
|
||||
struct xfs_bmbt_key bmbt;
|
||||
xfs_bmdr_key_t bmbr; /* bmbt root block */
|
||||
xfs_alloc_key_t alloc;
|
||||
struct xfs_inobt_key inobt;
|
||||
struct {
|
||||
struct xfs_rmap_key rmap;
|
||||
struct xfs_rmap_key rmap_hi;
|
||||
};
|
||||
struct xfs_rmap_key __rmap_bigkey[2];
|
||||
};
|
||||
|
||||
union xfs_btree_rec {
|
||||
|
@ -513,6 +501,8 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
|
|||
bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
|
||||
uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
|
||||
unsigned long len);
|
||||
xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
|
||||
unsigned long long len);
|
||||
|
||||
/* return codes */
|
||||
#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
|
||||
|
@ -529,4 +519,6 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
|
|||
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
|
||||
xfs_btree_visit_blocks_fn fn, void *data);
|
||||
|
||||
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
|
||||
|
||||
#endif /* __XFS_BTREE_H__ */
|
||||
|
|
|
@ -81,6 +81,10 @@
|
|||
* - For each work item attached to the log intent item,
|
||||
* * Perform the described action.
|
||||
* * Attach the work item to the log done item.
|
||||
* * If the result of doing the work was -EAGAIN, ->finish work
|
||||
* wants a new transaction. See the "Requesting a Fresh
|
||||
* Transaction while Finishing Deferred Work" section below for
|
||||
* details.
|
||||
*
|
||||
* The key here is that we must log an intent item for all pending
|
||||
* work items every time we roll the transaction, and that we must log
|
||||
|
@ -88,6 +92,34 @@
|
|||
* we can perform complex remapping operations, chaining intent items
|
||||
* as needed.
|
||||
*
|
||||
* Requesting a Fresh Transaction while Finishing Deferred Work
|
||||
*
|
||||
* If ->finish_item decides that it needs a fresh transaction to
|
||||
* finish the work, it must ask its caller (xfs_defer_finish) for a
|
||||
* continuation. The most likely cause of this circumstance are the
|
||||
* refcount adjust functions deciding that they've logged enough items
|
||||
* to be at risk of exceeding the transaction reservation.
|
||||
*
|
||||
* To get a fresh transaction, we want to log the existing log done
|
||||
* item to prevent the log intent item from replaying, immediately log
|
||||
* a new log intent item with the unfinished work items, roll the
|
||||
* transaction, and re-call ->finish_item wherever it left off. The
|
||||
* log done item and the new log intent item must be in the same
|
||||
* transaction or atomicity cannot be guaranteed; defer_finish ensures
|
||||
* that this happens.
|
||||
*
|
||||
* This requires some coordination between ->finish_item and
|
||||
* defer_finish. Upon deciding to request a new transaction,
|
||||
* ->finish_item should update the current work item to reflect the
|
||||
* unfinished work. Next, it should reset the log done item's list
|
||||
* count to the number of items finished, and return -EAGAIN.
|
||||
* defer_finish sees the -EAGAIN, logs the new log intent item
|
||||
* with the remaining work items, and leaves the xfs_defer_pending
|
||||
* item at the head of the dop_work queue. Then it rolls the
|
||||
* transaction and picks up processing where it left off. It is
|
||||
* required that ->finish_item must be careful to leave enough
|
||||
* transaction reservation to fit the new log intent item.
|
||||
*
|
||||
* This is an example of remapping the extent (E, E+B) into file X at
|
||||
* offset A and dealing with the extent (C, C+B) already being mapped
|
||||
* there:
|
||||
|
@ -104,21 +136,26 @@
|
|||
* | Intent to add rmap (X, E, A, B) |
|
||||
* +-------------------------------------------------+
|
||||
* | Reduce refcount for extent (C, B) | t2
|
||||
* | Done reducing refcount for extent (C, B) |
|
||||
* | Done reducing refcount for extent (C, 9) |
|
||||
* | Intent to reduce refcount for extent (C+9, B-9) |
|
||||
* | (ran out of space after 9 refcount updates) |
|
||||
* +-------------------------------------------------+
|
||||
* | Reduce refcount for extent (C+9, B+9) | t3
|
||||
* | Done reducing refcount for extent (C+9, B-9) |
|
||||
* | Increase refcount for extent (E, B) |
|
||||
* | Done increasing refcount for extent (E, B) |
|
||||
* | Intent to free extent (C, B) |
|
||||
* | Intent to free extent (F, 1) (refcountbt block) |
|
||||
* | Intent to remove rmap (F, 1, REFC) |
|
||||
* +-------------------------------------------------+
|
||||
* | Remove rmap (X, C, A, B) | t3
|
||||
* | Remove rmap (X, C, A, B) | t4
|
||||
* | Done removing rmap (X, C, A, B) |
|
||||
* | Add rmap (X, E, A, B) |
|
||||
* | Done adding rmap (X, E, A, B) |
|
||||
* | Remove rmap (F, 1, REFC) |
|
||||
* | Done removing rmap (F, 1, REFC) |
|
||||
* +-------------------------------------------------+
|
||||
* | Free extent (C, B) | t4
|
||||
* | Free extent (C, B) | t5
|
||||
* | Done freeing extent (C, B) |
|
||||
* | Free extent (D, 1) |
|
||||
* | Done freeing extent (D, 1) |
|
||||
|
@ -141,6 +178,9 @@
|
|||
* - Intent to free extent (C, B)
|
||||
* - Intent to free extent (F, 1) (refcountbt block)
|
||||
* - Intent to remove rmap (F, 1, REFC)
|
||||
*
|
||||
* Note that the continuation requested between t2 and t3 is likely to
|
||||
* reoccur.
|
||||
*/
|
||||
|
||||
static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
|
||||
|
@ -323,7 +363,16 @@ xfs_defer_finish(
|
|||
dfp->dfp_count--;
|
||||
error = dfp->dfp_type->finish_item(*tp, dop, li,
|
||||
dfp->dfp_done, &state);
|
||||
if (error) {
|
||||
if (error == -EAGAIN) {
|
||||
/*
|
||||
* Caller wants a fresh transaction;
|
||||
* put the work item back on the list
|
||||
* and jump out.
|
||||
*/
|
||||
list_add(li, &dfp->dfp_work);
|
||||
dfp->dfp_count++;
|
||||
break;
|
||||
} else if (error) {
|
||||
/*
|
||||
* Clean up after ourselves and jump out.
|
||||
* xfs_defer_cancel will take care of freeing
|
||||
|
@ -335,9 +384,25 @@ xfs_defer_finish(
|
|||
goto out;
|
||||
}
|
||||
}
|
||||
/* Done with the dfp, free it. */
|
||||
list_del(&dfp->dfp_list);
|
||||
kmem_free(dfp);
|
||||
if (error == -EAGAIN) {
|
||||
/*
|
||||
* Caller wants a fresh transaction, so log a
|
||||
* new log intent item to replace the old one
|
||||
* and roll the transaction. See "Requesting
|
||||
* a Fresh Transaction while Finishing
|
||||
* Deferred Work" above.
|
||||
*/
|
||||
dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
|
||||
dfp->dfp_count);
|
||||
dfp->dfp_done = NULL;
|
||||
list_for_each(li, &dfp->dfp_work)
|
||||
dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
|
||||
li);
|
||||
} else {
|
||||
/* Done with the dfp, free it. */
|
||||
list_del(&dfp->dfp_list);
|
||||
kmem_free(dfp);
|
||||
}
|
||||
|
||||
if (cleanup_fn)
|
||||
cleanup_fn(*tp, state, error);
|
||||
|
|
|
@ -132,7 +132,7 @@ xfs_inobt_free_block(
|
|||
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
|
||||
return xfs_free_extent(cur->bc_tp,
|
||||
XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
|
||||
&oinfo);
|
||||
&oinfo, XFS_AG_RESV_NONE);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
|
|
@ -647,9 +647,17 @@ struct xfs_rui_log_format {
|
|||
__uint16_t rui_size; /* size of this item */
|
||||
__uint32_t rui_nextents; /* # extents to free */
|
||||
__uint64_t rui_id; /* rui identifier */
|
||||
struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */
|
||||
struct xfs_map_extent rui_extents[]; /* array of extents to rmap */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_rui_log_format_sizeof(
|
||||
unsigned int nr)
|
||||
{
|
||||
return sizeof(struct xfs_rui_log_format) +
|
||||
nr * sizeof(struct xfs_map_extent);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the structure used to lay out an rud log item in the
|
||||
* log. The rud_extents array is a variable size array whose
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "xfs_mru_cache.h"
|
||||
#include "xfs_filestream.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_ag_resv.h"
|
||||
|
||||
struct xfs_fstrm_item {
|
||||
struct xfs_mru_cache_elem mru;
|
||||
|
@ -198,7 +199,8 @@ xfs_filestream_pick_ag(
|
|||
}
|
||||
|
||||
longest = xfs_alloc_longest_free_extent(mp, pag,
|
||||
xfs_alloc_min_freelist(mp, pag));
|
||||
xfs_alloc_min_freelist(mp, pag),
|
||||
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
|
||||
if (((minlen && longest >= minlen) ||
|
||||
(!minlen && pag->pagf_freeblks >= minfree)) &&
|
||||
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
|
||||
|
|
|
@ -553,7 +553,7 @@ xfs_growfs_data_private(
|
|||
error = xfs_free_extent(tp,
|
||||
XFS_AGB_TO_FSB(mp, agno,
|
||||
be32_to_cpu(agf->agf_length) - new),
|
||||
new, &oinfo);
|
||||
new, &oinfo, XFS_AG_RESV_NONE);
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
|
|
@ -331,6 +331,22 @@ xfs_mp_fail_writes(struct xfs_mount *mp)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* per-AG block reservation data structures*/
|
||||
enum xfs_ag_resv_type {
|
||||
XFS_AG_RESV_NONE = 0,
|
||||
XFS_AG_RESV_METADATA,
|
||||
XFS_AG_RESV_AGFL,
|
||||
};
|
||||
|
||||
struct xfs_ag_resv {
|
||||
/* number of blocks originally reserved here */
|
||||
xfs_extlen_t ar_orig_reserved;
|
||||
/* number of blocks reserved here */
|
||||
xfs_extlen_t ar_reserved;
|
||||
/* number of blocks originally asked for */
|
||||
xfs_extlen_t ar_asked;
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-ag incore structure, copies of information in agf and agi, to improve the
|
||||
* performance of allocation group selection.
|
||||
|
@ -378,8 +394,28 @@ typedef struct xfs_perag {
|
|||
/* for rcu-safe freeing */
|
||||
struct rcu_head rcu_head;
|
||||
int pagb_count; /* pagb slots in use */
|
||||
|
||||
/* Blocks reserved for all kinds of metadata. */
|
||||
struct xfs_ag_resv pag_meta_resv;
|
||||
/* Blocks reserved for just AGFL-based metadata. */
|
||||
struct xfs_ag_resv pag_agfl_resv;
|
||||
} xfs_perag_t;
|
||||
|
||||
static inline struct xfs_ag_resv *
|
||||
xfs_perag_resv(
|
||||
struct xfs_perag *pag,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case XFS_AG_RESV_METADATA:
|
||||
return &pag->pag_meta_resv;
|
||||
case XFS_AG_RESV_AGFL:
|
||||
return &pag->pag_agfl_resv;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
extern void xfs_uuid_table_free(void);
|
||||
extern int xfs_log_sbcount(xfs_mount_t *);
|
||||
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
|
||||
|
|
|
@ -51,28 +51,16 @@ xfs_rui_item_free(
|
|||
kmem_zone_free(xfs_rui_zone, ruip);
|
||||
}
|
||||
|
||||
/*
|
||||
* This returns the number of iovecs needed to log the given rui item.
|
||||
* We only need 1 iovec for an rui item. It just logs the rui_log_format
|
||||
* structure.
|
||||
*/
|
||||
static inline int
|
||||
xfs_rui_item_sizeof(
|
||||
struct xfs_rui_log_item *ruip)
|
||||
{
|
||||
return sizeof(struct xfs_rui_log_format) +
|
||||
(ruip->rui_format.rui_nextents - 1) *
|
||||
sizeof(struct xfs_map_extent);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_rui_item_size(
|
||||
struct xfs_log_item *lip,
|
||||
int *nvecs,
|
||||
int *nbytes)
|
||||
{
|
||||
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
|
||||
|
||||
*nvecs += 1;
|
||||
*nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip));
|
||||
*nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -97,7 +85,7 @@ xfs_rui_item_format(
|
|||
ruip->rui_format.rui_size = 1;
|
||||
|
||||
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
|
||||
xfs_rui_item_sizeof(ruip));
|
||||
xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -205,16 +193,12 @@ xfs_rui_init(
|
|||
|
||||
{
|
||||
struct xfs_rui_log_item *ruip;
|
||||
uint size;
|
||||
|
||||
ASSERT(nextents > 0);
|
||||
if (nextents > XFS_RUI_MAX_FAST_EXTENTS) {
|
||||
size = (uint)(sizeof(struct xfs_rui_log_item) +
|
||||
((nextents - 1) * sizeof(struct xfs_map_extent)));
|
||||
ruip = kmem_zalloc(size, KM_SLEEP);
|
||||
} else {
|
||||
if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
|
||||
ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP);
|
||||
else
|
||||
ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
|
||||
}
|
||||
|
||||
xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
|
||||
ruip->rui_format.rui_nextents = nextents;
|
||||
|
@ -239,14 +223,12 @@ xfs_rui_copy_format(
|
|||
uint len;
|
||||
|
||||
src_rui_fmt = buf->i_addr;
|
||||
len = sizeof(struct xfs_rui_log_format) +
|
||||
(src_rui_fmt->rui_nextents - 1) *
|
||||
sizeof(struct xfs_map_extent);
|
||||
len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
|
||||
|
||||
if (buf->i_len != len)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len);
|
||||
memcpy(dst_rui_fmt, src_rui_fmt, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,14 @@ struct xfs_rui_log_item {
|
|||
struct xfs_rui_log_format rui_format;
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
xfs_rui_log_item_sizeof(
|
||||
unsigned int nr)
|
||||
{
|
||||
return offsetof(struct xfs_rui_log_item, rui_format) +
|
||||
xfs_rui_log_format_sizeof(nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the "rmap update done" log item. It is used to log the fact that
|
||||
* some rmapbt updates mentioned in an earlier rui item have been performed.
|
||||
|
|
|
@ -1782,9 +1782,8 @@ xfs_init_zones(void)
|
|||
if (!xfs_rud_zone)
|
||||
goto out_destroy_icreate_zone;
|
||||
|
||||
xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) +
|
||||
((XFS_RUI_MAX_FAST_EXTENTS - 1) *
|
||||
sizeof(struct xfs_map_extent))),
|
||||
xfs_rui_zone = kmem_zone_init(
|
||||
xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
|
||||
"xfs_rui_item");
|
||||
if (!xfs_rui_zone)
|
||||
goto out_destroy_rud_zone;
|
||||
|
|
|
@ -1570,14 +1570,15 @@ TRACE_EVENT(xfs_agf,
|
|||
|
||||
TRACE_EVENT(xfs_free_extent,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
|
||||
xfs_extlen_t len, bool isfl, int haveleft, int haveright),
|
||||
TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
|
||||
xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft,
|
||||
int haveright),
|
||||
TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
__field(xfs_agblock_t, agbno)
|
||||
__field(xfs_extlen_t, len)
|
||||
__field(int, isfl)
|
||||
__field(int, resv)
|
||||
__field(int, haveleft)
|
||||
__field(int, haveright)
|
||||
),
|
||||
|
@ -1586,16 +1587,16 @@ TRACE_EVENT(xfs_free_extent,
|
|||
__entry->agno = agno;
|
||||
__entry->agbno = agbno;
|
||||
__entry->len = len;
|
||||
__entry->isfl = isfl;
|
||||
__entry->resv = resv;
|
||||
__entry->haveleft = haveleft;
|
||||
__entry->haveright = haveright;
|
||||
),
|
||||
TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
|
||||
TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->agbno,
|
||||
__entry->len,
|
||||
__entry->isfl,
|
||||
__entry->resv,
|
||||
__entry->haveleft ?
|
||||
(__entry->haveright ? "both" : "left") :
|
||||
(__entry->haveright ? "right" : "none"))
|
||||
|
@ -1622,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
|||
__field(short, otype)
|
||||
__field(char, wasdel)
|
||||
__field(char, wasfromfl)
|
||||
__field(char, isfl)
|
||||
__field(int, resv)
|
||||
__field(char, userdata)
|
||||
__field(xfs_fsblock_t, firstblock)
|
||||
),
|
||||
|
@ -1643,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
|||
__entry->otype = args->otype;
|
||||
__entry->wasdel = args->wasdel;
|
||||
__entry->wasfromfl = args->wasfromfl;
|
||||
__entry->isfl = args->isfl;
|
||||
__entry->resv = args->resv;
|
||||
__entry->userdata = args->userdata;
|
||||
__entry->firstblock = args->firstblock;
|
||||
),
|
||||
TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
|
||||
"prod %u minleft %u total %u alignment %u minalignslop %u "
|
||||
"len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
|
||||
"len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
|
||||
"userdata %d firstblock 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
|
@ -1667,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
|
|||
__print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
|
||||
__entry->wasdel,
|
||||
__entry->wasfromfl,
|
||||
__entry->isfl,
|
||||
__entry->resv,
|
||||
__entry->userdata,
|
||||
(unsigned long long)__entry->firstblock)
|
||||
)
|
||||
|
@ -2558,6 +2559,60 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
|
|||
DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
|
||||
DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
|
||||
|
||||
/* per-AG reservation */
|
||||
DECLARE_EVENT_CLASS(xfs_ag_resv_class,
|
||||
TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv,
|
||||
xfs_extlen_t len),
|
||||
TP_ARGS(pag, resv, len),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
__field(int, resv)
|
||||
__field(xfs_extlen_t, freeblks)
|
||||
__field(xfs_extlen_t, flcount)
|
||||
__field(xfs_extlen_t, reserved)
|
||||
__field(xfs_extlen_t, asked)
|
||||
__field(xfs_extlen_t, len)
|
||||
),
|
||||
TP_fast_assign(
|
||||
struct xfs_ag_resv *r = xfs_perag_resv(pag, resv);
|
||||
|
||||
__entry->dev = pag->pag_mount->m_super->s_dev;
|
||||
__entry->agno = pag->pag_agno;
|
||||
__entry->resv = resv;
|
||||
__entry->freeblks = pag->pagf_freeblks;
|
||||
__entry->flcount = pag->pagf_flcount;
|
||||
__entry->reserved = r ? r->ar_reserved : 0;
|
||||
__entry->asked = r ? r->ar_asked : 0;
|
||||
__entry->len = len;
|
||||
),
|
||||
TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->resv,
|
||||
__entry->freeblks,
|
||||
__entry->flcount,
|
||||
__entry->reserved,
|
||||
__entry->asked,
|
||||
__entry->len)
|
||||
)
|
||||
#define DEFINE_AG_RESV_EVENT(name) \
|
||||
DEFINE_EVENT(xfs_ag_resv_class, name, \
|
||||
TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type type, \
|
||||
xfs_extlen_t len), \
|
||||
TP_ARGS(pag, type, len))
|
||||
|
||||
/* per-AG reservation tracepoints */
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_init);
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_free);
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_alloc_extent);
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent);
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical);
|
||||
DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
|
||||
|
||||
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
|
||||
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
|
||||
|
||||
#endif /* _TRACE_XFS_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
|
|
|
@ -318,7 +318,6 @@ xfs_trans_mod_sb(
|
|||
* in-core superblock's counter. This should only
|
||||
* be applied to the on-disk superblock.
|
||||
*/
|
||||
ASSERT(delta < 0);
|
||||
tp->t_res_fdblocks_delta += delta;
|
||||
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
|
||||
flags &= ~XFS_TRANS_SB_DIRTY;
|
||||
|
|
|
@ -79,7 +79,8 @@ xfs_trans_free_extent(
|
|||
|
||||
trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
|
||||
|
||||
error = xfs_free_extent(tp, start_block, ext_len, oinfo);
|
||||
error = xfs_free_extent(tp, start_block, ext_len, oinfo,
|
||||
XFS_AG_RESV_NONE);
|
||||
|
||||
/*
|
||||
* Mark the transaction dirty, even on error. This ensures the
|
||||
|
|
Loading…
Reference in a new issue