xfrm: cache bundles instead of policies for outgoing flows

__xfrm_lookup() is called for each packet transmitted out of
system. The xfrm_find_bundle() does a linear search which can
kill system performance depending on how many bundles are
required per policy.

This modifies __xfrm_lookup() to store bundles directly in
the flow cache. If we did not get a hit, we just create a new
bundle instead of doing slow search. This means that we can now
get multiple xfrm_dst's for same flow (on per-cpu basis).

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Timo Teräs 2010-04-07 00:30:05 +00:00 committed by David S. Miller
parent fe1a5f031e
commit 80c802f307
4 changed files with 385 additions and 393 deletions

View file

@ -267,7 +267,6 @@ struct xfrm_policy_afinfo {
xfrm_address_t *saddr,
xfrm_address_t *daddr);
int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
void (*decode_session)(struct sk_buff *skb,
struct flowi *fl,
int reverse);
@ -483,13 +482,13 @@ struct xfrm_policy {
struct timer_list timer;
struct flow_cache_object flo;
atomic_t genid;
u32 priority;
u32 index;
struct xfrm_mark mark;
struct xfrm_selector selector;
struct xfrm_lifetime_cfg lft;
struct xfrm_lifetime_cur curlft;
struct dst_entry *bundles;
struct xfrm_policy_walk_entry walk;
u8 type;
u8 action;
@ -879,11 +878,15 @@ struct xfrm_dst {
struct rt6_info rt6;
} u;
struct dst_entry *route;
struct flow_cache_object flo;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
#ifdef CONFIG_XFRM_SUB_POLICY
struct flowi *origin;
struct xfrm_selector *partner;
#endif
u32 genid;
u32 xfrm_genid;
u32 policy_genid;
u32 route_mtu_cached;
u32 child_mtu_cached;
u32 route_cookie;
@ -893,6 +896,7 @@ struct xfrm_dst {
#ifdef CONFIG_XFRM
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{
xfrm_pols_put(xdst->pols, xdst->num_pols);
dst_release(xdst->route);
if (likely(xdst->u.dst.xfrm))
xfrm_state_put(xdst->u.dst.xfrm);

View file

@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
return 0;
}
static struct dst_entry *
__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
struct dst_entry *dst;
read_lock_bh(&policy->lock);
for (dst = policy->bundles; dst; dst = dst->next) {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
xdst->u.rt.fl.fl4_src == fl->fl4_src &&
xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
dst_clone(dst);
break;
}
}
read_unlock_bh(&policy->lock);
return dst;
}
static int xfrm4_get_tos(struct flowi *fl)
{
return fl->fl4_tos;
@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.dst_ops = &xfrm4_dst_ops,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.find_bundle = __xfrm4_find_bundle,
.decode_session = _decode_session4,
.get_tos = xfrm4_get_tos,
.init_path = xfrm4_init_path,

View file

@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
return 0;
}
static struct dst_entry *
__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
struct dst_entry *dst;
/* Still not clear if we should set fl->fl6_{src,dst}... */
read_lock_bh(&policy->lock);
for (dst = policy->bundles; dst; dst = dst->next) {
struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
struct in6_addr fl_dst_prefix, fl_src_prefix;
ipv6_addr_prefix(&fl_dst_prefix,
&fl->fl6_dst,
xdst->u.rt6.rt6i_dst.plen);
ipv6_addr_prefix(&fl_src_prefix,
&fl->fl6_src,
xdst->u.rt6.rt6i_src.plen);
if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
(xdst->u.rt6.rt6i_dst.plen != 128 ||
xdst->u.rt6.rt6i_src.plen != 128))) {
dst_clone(dst);
break;
}
}
read_unlock_bh(&policy->lock);
return dst;
}
static int xfrm6_get_tos(struct flowi *fl)
{
return 0;
@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.dst_ops = &xfrm6_dst_ops,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.find_bundle = __xfrm6_find_bundle,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
.init_path = xfrm6_init_path,

View file

@ -37,6 +37,8 @@
DEFINE_MUTEX(xfrm_cfg_mutex);
EXPORT_SYMBOL(xfrm_cfg_mutex);
static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock);
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir);
@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
{
BUG_ON(!policy->walk.dead);
BUG_ON(policy->bundles);
if (del_timer(&policy->timer))
BUG();
@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
{
struct dst_entry *dst;
while ((dst = policy->bundles) != NULL) {
policy->bundles = dst->next;
dst_free(dst);
}
atomic_inc(&policy->genid);
if (del_timer(&policy->timer))
atomic_dec(&policy->refcnt);
@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
struct hlist_node *entry, *newpos;
struct dst_entry *gc_list;
u32 mark = policy->mark.v & policy->mark.m;
write_lock_bh(&xfrm_policy_lock);
@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
else if (xfrm_bydst_should_resize(net, dir, NULL))
schedule_work(&net->xfrm.policy_hash_work);
read_lock_bh(&xfrm_policy_lock);
gc_list = NULL;
entry = &policy->bydst;
hlist_for_each_entry_continue(policy, entry, bydst) {
struct dst_entry *dst;
write_lock(&policy->lock);
dst = policy->bundles;
if (dst) {
struct dst_entry *tail = dst;
while (tail->next)
tail = tail->next;
tail->next = gc_list;
gc_list = dst;
policy->bundles = NULL;
}
write_unlock(&policy->lock);
}
read_unlock_bh(&xfrm_policy_lock);
while (gc_list) {
struct dst_entry *dst = gc_list;
gc_list = dst->next;
dst_free(dst);
}
return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);
@ -998,6 +965,19 @@ fail:
return ret;
}
static struct xfrm_policy *
__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
{
#ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol;
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
if (pol != NULL)
return pol;
#endif
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}
static struct flow_cache_object *
xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
u8 dir, struct flow_cache_object *old_obj, void *ctx)
@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
if (old_obj)
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
#ifdef CONFIG_XFRM_SUB_POLICY
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
if (IS_ERR(pol))
pol = __xfrm_policy_lookup(net, fl, family, dir);
if (IS_ERR_OR_NULL(pol))
return ERR_CAST(pol);
if (pol)
goto found;
#endif
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
if (IS_ERR(pol))
return ERR_CAST(pol);
if (pol)
goto found;
return NULL;
found:
/* Resolver returns two references:
* one for cache and one for caller of flow_cache_lookup() */
xfrm_pol_hold(pol);
@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
* still valid.
*/
static struct dst_entry *
xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
{
struct dst_entry *x;
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return ERR_PTR(-EINVAL);
x = afinfo->find_bundle(fl, policy);
xfrm_policy_put_afinfo(afinfo);
return x;
}
static inline int xfrm_get_tos(struct flowi *fl, int family)
{
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
return tos;
}
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (xdst->route == NULL) {
/* Dummy bundle - if it has xfrms we were not
* able to build bundle as template resolution failed.
* It means we need to try again resolving. */
if (xdst->num_xfrms > 0)
return NULL;
} else {
/* Real bundle */
if (stale_bundle(dst))
return NULL;
}
dst_hold(dst);
return flo;
}
static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (!xdst->route)
return 0;
if (stale_bundle(dst))
return 0;
return 1;
}
static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
dst_free(dst);
}
static const struct flow_cache_ops xfrm_bundle_fc_ops = {
.get = xfrm_bundle_flo_get,
.check = xfrm_bundle_flo_check,
.delete = xfrm_bundle_flo_delete,
};
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
BUG();
}
xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
xfrm_policy_put_afinfo(afinfo);
xdst->flo.ops = &xfrm_bundle_fc_ops;
return xdst;
}
@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return err;
}
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
* all the metrics... Shortly, bundle a bundle.
*/
@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_hold(dst);
dst1->xfrm = xfrm[i];
xdst->genid = xfrm[i]->genid;
xdst->xfrm_genid = xfrm[i]->genid;
dst1->obsolete = -1;
dst1->flags |= DST_HOST;
@ -1558,7 +1565,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
#endif
}
static int stale_bundle(struct dst_entry *dst);
static int xfrm_expand_policies(struct flowi *fl, u16 family,
struct xfrm_policy **pols,
int *num_pols, int *num_xfrms)
{
int i;
if (*num_pols == 0 || !pols[0]) {
*num_pols = 0;
*num_xfrms = 0;
return 0;
}
if (IS_ERR(pols[0]))
return PTR_ERR(pols[0]);
*num_xfrms = pols[0]->xfrm_nr;
#ifdef CONFIG_XFRM_SUB_POLICY
if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
XFRM_POLICY_TYPE_MAIN,
fl, family,
XFRM_POLICY_OUT);
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
return PTR_ERR(pols[1]);
}
(*num_pols) ++;
(*num_xfrms) += pols[1]->xfrm_nr;
}
}
#endif
for (i = 0; i < *num_pols; i++) {
if (pols[i]->action != XFRM_POLICY_ALLOW) {
*num_xfrms = -1;
break;
}
}
return 0;
}
static struct xfrm_dst *
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
struct flowi *fl, u16 family,
struct dst_entry *dst_orig)
{
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct dst_entry *dst;
struct xfrm_dst *xdst;
int err;
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err < 0) {
if (err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
return ERR_PTR(err);
}
dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
return ERR_CAST(dst);
}
xdst = (struct xfrm_dst *)dst;
xdst->num_xfrms = err;
if (num_pols > 1)
err = xfrm_dst_update_parent(dst, &pols[1]->selector);
else
err = xfrm_dst_update_origin(dst, fl);
if (unlikely(err)) {
dst_free(dst);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
return ERR_PTR(err);
}
xdst->num_pols = num_pols;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
return xdst;
}
static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
struct dst_entry *dst_orig = (struct dst_entry *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
/* Check if the policies from old bundle are usable */
xdst = NULL;
if (oldflo) {
xdst = container_of(oldflo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms;
pol_dead = 0;
for (i = 0; i < num_pols; i++) {
pols[i] = xdst->pols[i];
pol_dead |= pols[i]->walk.dead;
}
if (pol_dead) {
dst_free(&xdst->u.dst);
xdst = NULL;
num_pols = 0;
num_xfrms = 0;
oldflo = NULL;
}
}
/* Resolve policies to use if we couldn't get them from
* previous cache entry */
if (xdst == NULL) {
num_pols = 1;
pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
goto inc_error;
if (num_pols == 0)
return NULL;
if (num_xfrms <= 0)
goto make_dummy_bundle;
}
new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
goto error;
if (oldflo == NULL)
goto make_dummy_bundle;
dst_hold(&xdst->u.dst);
return oldflo;
}
/* Kill the previous bundle */
if (xdst) {
/* The policies were stolen for newly generated bundle */
xdst->num_pols = 0;
dst_free(&xdst->u.dst);
}
/* Flow cache does not have reference, it dst_free()'s,
* but we do need to return one reference for original caller */
dst_hold(&new_xdst->u.dst);
return &new_xdst->flo;
make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
xdst = xfrm_alloc_dst(net, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
}
xdst->num_pols = num_pols;
xdst->num_xfrms = num_xfrms;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
dst_hold(&xdst->u.dst);
return &xdst->flo;
inc_error:
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
if (xdst != NULL)
dst_free(&xdst->u.dst);
else
xfrm_pols_put(pols, num_pols);
return ERR_PTR(err);
}
/* Main function: finds/creates a bundle for given flow.
*
@ -1568,248 +1754,152 @@ static int stale_bundle(struct dst_entry *dst);
int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
struct sock *sk, int flags)
{
struct xfrm_policy *policy;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int npols;
int pol_dead;
int xfrm_nr;
int pi;
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct dst_entry *dst, *dst_orig = *dst_p;
int nx = 0;
int err;
u32 genid;
u16 family;
struct flow_cache_object *flo;
struct xfrm_dst *xdst;
struct dst_entry *dst, *dst_orig = *dst_p, *route;
u16 family = dst_orig->ops->family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
int i, err, num_pols, num_xfrms, drop_pols = 0;
restart:
genid = atomic_read(&flow_cache_genid);
policy = NULL;
for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
pols[pi] = NULL;
npols = 0;
pol_dead = 0;
xfrm_nr = 0;
dst = NULL;
xdst = NULL;
route = NULL;
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
err = PTR_ERR(policy);
if (IS_ERR(policy)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
num_pols = 1;
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
goto dropdst;
if (num_pols) {
if (num_xfrms <= 0) {
drop_pols = num_pols;
goto no_transform;
}
xdst = xfrm_resolve_and_create_bundle(
pols, num_pols, fl,
family, dst_orig);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
goto dropdst;
}
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
xdst->u.dst.next = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = &xdst->u.dst;
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
route = xdst->route;
}
}
if (!policy) {
struct flow_cache_object *flo;
if (xdst == NULL) {
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
dir, xfrm_policy_lookup, NULL);
err = PTR_ERR(flo);
flo = flow_cache_lookup(net, fl, family, dir,
xfrm_bundle_lookup, dst_orig);
if (flo == NULL)
goto nopol;
if (IS_ERR(flo)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
err = PTR_ERR(flo);
goto dropdst;
}
if (flo)
policy = container_of(flo, struct xfrm_policy, flo);
else
policy = NULL;
xdst = container_of(flo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms;
memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
route = xdst->route;
}
if (!policy)
dst = &xdst->u.dst;
if (route == NULL && num_xfrms > 0) {
/* The only case when xfrm_bundle_lookup() returns a
* bundle with null route, is when the template could
* not be resolved. It means policies are there, but
* bundle could not be created, since we don't yet
* have the xfrm_state's. We need to wait for KM to
* negotiate new SA's or bail out with error.*/
if (net->xfrm.sysctl_larval_drop) {
/* EREMOTE tells the caller to generate
* a one-shot blackhole route. */
dst_release(dst);
xfrm_pols_put(pols, num_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
return -EREMOTE;
}
if (flags & XFRM_LOOKUP_WAIT) {
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&net->xfrm.km_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&net->xfrm.km_waitq, &wait);
if (!signal_pending(current)) {
dst_release(dst);
goto restart;
}
err = -ERESTART;
} else
err = -EAGAIN;
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
goto error;
}
no_transform:
if (num_pols == 0)
goto nopol;
family = dst_orig->ops->family;
pols[0] = policy;
npols ++;
xfrm_nr += pols[0]->xfrm_nr;
err = -ENOENT;
if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
if ((flags & XFRM_LOOKUP_ICMP) &&
!(pols[0]->flags & XFRM_POLICY_ICMP)) {
err = -ENOENT;
goto error;
}
policy->curlft.use_time = get_seconds();
for (i = 0; i < num_pols; i++)
pols[i]->curlft.use_time = get_seconds();
switch (policy->action) {
default:
case XFRM_POLICY_BLOCK:
if (num_xfrms < 0) {
/* Prohibit the flow */
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
err = -EPERM;
goto error;
case XFRM_POLICY_ALLOW:
#ifndef CONFIG_XFRM_SUB_POLICY
if (policy->xfrm_nr == 0) {
/* Flow passes not transformed. */
xfrm_pol_put(policy);
return 0;
}
#endif
/* Try to find matching bundle.
*
* LATER: help from flow cache. It is optional, this
* is required only for output policy.
*/
dst = xfrm_find_bundle(fl, policy, family);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
err = PTR_ERR(dst);
goto error;
}
if (dst)
break;
#ifdef CONFIG_XFRM_SUB_POLICY
if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
pols[1] = xfrm_policy_lookup_bytype(net,
XFRM_POLICY_TYPE_MAIN,
fl, family,
XFRM_POLICY_OUT);
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
err = PTR_ERR(pols[1]);
goto error;
}
if (pols[1]->action == XFRM_POLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
err = -EPERM;
goto error;
}
npols ++;
xfrm_nr += pols[1]->xfrm_nr;
}
}
/*
* Because neither flowi nor bundle information knows about
* transformation template size. On more than one policy usage
* we can realize whether all of them is bypass or not after
* they are searched. See above not-transformed bypass
* is surrounded by non-sub policy configuration, too.
*/
if (xfrm_nr == 0) {
/* Flow passes not transformed. */
xfrm_pols_put(pols, npols);
return 0;
}
#endif
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
if (unlikely(nx<0)) {
err = nx;
if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
/* EREMOTE tells the caller to generate
* a one-shot blackhole route.
*/
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
xfrm_pol_put(policy);
return -EREMOTE;
}
if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&net->xfrm.km_waitq, &wait);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
set_current_state(TASK_RUNNING);
remove_wait_queue(&net->xfrm.km_waitq, &wait);
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
if (nx == -EAGAIN && signal_pending(current)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
err = -ERESTART;
goto error;
}
if (nx == -EAGAIN ||
genid != atomic_read(&flow_cache_genid)) {
xfrm_pols_put(pols, npols);
goto restart;
}
err = nx;
}
if (err < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
goto error;
}
}
if (nx == 0) {
/* Flow passes not transformed. */
xfrm_pols_put(pols, npols);
return 0;
}
dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
err = PTR_ERR(dst);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
goto error;
}
for (pi = 0; pi < npols; pi++)
pol_dead |= pols[pi]->walk.dead;
write_lock_bh(&policy->lock);
if (unlikely(pol_dead || stale_bundle(dst))) {
/* Wow! While we worked on resolving, this
* policy has gone. Retry. It is not paranoia,
* we just cannot enlist new bundle to dead object.
* We can't enlist stable bundles either.
*/
write_unlock_bh(&policy->lock);
dst_free(dst);
if (pol_dead)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
else
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
err = -EHOSTUNREACH;
goto error;
}
if (npols > 1)
err = xfrm_dst_update_parent(dst, &pols[1]->selector);
else
err = xfrm_dst_update_origin(dst, fl);
if (unlikely(err)) {
write_unlock_bh(&policy->lock);
dst_free(dst);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
goto error;
}
dst->next = policy->bundles;
policy->bundles = dst;
dst_hold(dst);
write_unlock_bh(&policy->lock);
} else if (num_xfrms > 0) {
/* Flow transformed */
*dst_p = dst;
dst_release(dst_orig);
} else {
/* Flow passes untransformed */
dst_release(dst);
}
*dst_p = dst;
dst_release(dst_orig);
xfrm_pols_put(pols, npols);
ok:
xfrm_pols_put(pols, drop_pols);
return 0;
nopol:
if (!(flags & XFRM_LOOKUP_ICMP))
goto ok;
err = -ENOENT;
error:
xfrm_pols_put(pols, npols);
dst_release(dst);
dropdst:
dst_release(dst_orig);
*dst_p = NULL;
xfrm_pols_put(pols, drop_pols);
return err;
nopol:
err = -ENOENT;
if (flags & XFRM_LOOKUP_ICMP)
goto dropdst;
return 0;
}
EXPORT_SYMBOL(__xfrm_lookup);
@ -2161,69 +2251,22 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
{
struct dst_entry *dst, **dstp;
write_lock(&pol->lock);
dstp = &pol->bundles;
while ((dst=*dstp) != NULL) {
if (func(dst)) {
*dstp = dst->next;
dst->next = *gc_list_p;
*gc_list_p = dst;
} else {
dstp = &dst->next;
}
}
write_unlock(&pol->lock);
}
static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
{
struct dst_entry *gc_list = NULL;
int dir;
read_lock_bh(&xfrm_policy_lock);
for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
struct xfrm_policy *pol;
struct hlist_node *entry;
struct hlist_head *table;
int i;
hlist_for_each_entry(pol, entry,
&net->xfrm.policy_inexact[dir], bydst)
prune_one_bundle(pol, func, &gc_list);
table = net->xfrm.policy_bydst[dir].table;
for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
hlist_for_each_entry(pol, entry, table + i, bydst)
prune_one_bundle(pol, func, &gc_list);
}
}
read_unlock_bh(&xfrm_policy_lock);
while (gc_list) {
struct dst_entry *dst = gc_list;
gc_list = dst->next;
dst_free(dst);
}
}
static int unused_bundle(struct dst_entry *dst)
{
return !atomic_read(&dst->__refcnt);
}
static void __xfrm_garbage_collect(struct net *net)
{
xfrm_prune_bundles(net, unused_bundle);
}
struct dst_entry *head, *next;
static int xfrm_flush_bundles(struct net *net)
{
xfrm_prune_bundles(net, stale_bundle);
return 0;
flow_cache_flush();
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
head = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = NULL;
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
while (head) {
next = head->next;
dst_free(head);
head = next;
}
}
static void xfrm_init_pmtu(struct dst_entry *dst)
@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
return 0;
if (dst->xfrm->km.state != XFRM_STATE_VALID)
return 0;
if (xdst->genid != dst->xfrm->genid)
if (xdst->xfrm_genid != dst->xfrm->genid)
return 0;
if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
return 0;
if (strict && fl &&
@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
switch (event) {
case NETDEV_DOWN:
xfrm_flush_bundles(dev_net(dev));
__xfrm_garbage_collect(dev_net(dev));
}
return NOTIFY_DONE;
}
@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
struct xfrm_migrate *m, int num_migrate)
{
struct xfrm_migrate *mp;
struct dst_entry *dst;
int i, j, n = 0;
write_lock_bh(&pol->lock);
@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
sizeof(pol->xfrm_vec[i].saddr));
pol->xfrm_vec[i].encap_family = mp->new_family;
/* flush bundles */
while ((dst = pol->bundles) != NULL) {
pol->bundles = dst->next;
dst_free(dst);
}
atomic_inc(&pol->genid);
}
}