xfrm: cache bundles instead of policies for outgoing flows
__xfrm_lookup() is called for each packet transmitted out of system. The xfrm_find_bundle() does a linear search which can kill system performance depending on how many bundles are required per policy. This modifies __xfrm_lookup() to store bundles directly in the flow cache. If we did not get a hit, we just create a new bundle instead of doing slow search. This means that we can now get multiple xfrm_dst's for same flow (on per-cpu basis). Signed-off-by: Timo Teras <timo.teras@iki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
fe1a5f031e
commit
80c802f307
4 changed files with 385 additions and 393 deletions
|
@ -267,7 +267,6 @@ struct xfrm_policy_afinfo {
|
|||
xfrm_address_t *saddr,
|
||||
xfrm_address_t *daddr);
|
||||
int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
|
||||
struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
|
||||
void (*decode_session)(struct sk_buff *skb,
|
||||
struct flowi *fl,
|
||||
int reverse);
|
||||
|
@ -483,13 +482,13 @@ struct xfrm_policy {
|
|||
struct timer_list timer;
|
||||
|
||||
struct flow_cache_object flo;
|
||||
atomic_t genid;
|
||||
u32 priority;
|
||||
u32 index;
|
||||
struct xfrm_mark mark;
|
||||
struct xfrm_selector selector;
|
||||
struct xfrm_lifetime_cfg lft;
|
||||
struct xfrm_lifetime_cur curlft;
|
||||
struct dst_entry *bundles;
|
||||
struct xfrm_policy_walk_entry walk;
|
||||
u8 type;
|
||||
u8 action;
|
||||
|
@ -879,11 +878,15 @@ struct xfrm_dst {
|
|||
struct rt6_info rt6;
|
||||
} u;
|
||||
struct dst_entry *route;
|
||||
struct flow_cache_object flo;
|
||||
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
|
||||
int num_pols, num_xfrms;
|
||||
#ifdef CONFIG_XFRM_SUB_POLICY
|
||||
struct flowi *origin;
|
||||
struct xfrm_selector *partner;
|
||||
#endif
|
||||
u32 genid;
|
||||
u32 xfrm_genid;
|
||||
u32 policy_genid;
|
||||
u32 route_mtu_cached;
|
||||
u32 child_mtu_cached;
|
||||
u32 route_cookie;
|
||||
|
@ -893,6 +896,7 @@ struct xfrm_dst {
|
|||
#ifdef CONFIG_XFRM
|
||||
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
|
||||
{
|
||||
xfrm_pols_put(xdst->pols, xdst->num_pols);
|
||||
dst_release(xdst->route);
|
||||
if (likely(xdst->u.dst.xfrm))
|
||||
xfrm_state_put(xdst->u.dst.xfrm);
|
||||
|
|
|
@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct dst_entry *
|
||||
__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
read_lock_bh(&policy->lock);
|
||||
for (dst = policy->bundles; dst; dst = dst->next) {
|
||||
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
|
||||
if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
|
||||
xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
|
||||
xdst->u.rt.fl.fl4_src == fl->fl4_src &&
|
||||
xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
|
||||
xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
|
||||
dst_clone(dst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&policy->lock);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int xfrm4_get_tos(struct flowi *fl)
|
||||
{
|
||||
return fl->fl4_tos;
|
||||
|
@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
|
|||
.dst_ops = &xfrm4_dst_ops,
|
||||
.dst_lookup = xfrm4_dst_lookup,
|
||||
.get_saddr = xfrm4_get_saddr,
|
||||
.find_bundle = __xfrm4_find_bundle,
|
||||
.decode_session = _decode_session4,
|
||||
.get_tos = xfrm4_get_tos,
|
||||
.init_path = xfrm4_init_path,
|
||||
|
|
|
@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct dst_entry *
|
||||
__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
/* Still not clear if we should set fl->fl6_{src,dst}... */
|
||||
read_lock_bh(&policy->lock);
|
||||
for (dst = policy->bundles; dst; dst = dst->next) {
|
||||
struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
|
||||
struct in6_addr fl_dst_prefix, fl_src_prefix;
|
||||
|
||||
ipv6_addr_prefix(&fl_dst_prefix,
|
||||
&fl->fl6_dst,
|
||||
xdst->u.rt6.rt6i_dst.plen);
|
||||
ipv6_addr_prefix(&fl_src_prefix,
|
||||
&fl->fl6_src,
|
||||
xdst->u.rt6.rt6i_src.plen);
|
||||
if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
|
||||
ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
|
||||
xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
|
||||
(xdst->u.rt6.rt6i_dst.plen != 128 ||
|
||||
xdst->u.rt6.rt6i_src.plen != 128))) {
|
||||
dst_clone(dst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&policy->lock);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static int xfrm6_get_tos(struct flowi *fl)
|
||||
{
|
||||
return 0;
|
||||
|
@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
|
|||
.dst_ops = &xfrm6_dst_ops,
|
||||
.dst_lookup = xfrm6_dst_lookup,
|
||||
.get_saddr = xfrm6_get_saddr,
|
||||
.find_bundle = __xfrm6_find_bundle,
|
||||
.decode_session = _decode_session6,
|
||||
.get_tos = xfrm6_get_tos,
|
||||
.init_path = xfrm6_init_path,
|
||||
|
|
|
@ -37,6 +37,8 @@
|
|||
DEFINE_MUTEX(xfrm_cfg_mutex);
|
||||
EXPORT_SYMBOL(xfrm_cfg_mutex);
|
||||
|
||||
static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
|
||||
static struct dst_entry *xfrm_policy_sk_bundles;
|
||||
static DEFINE_RWLOCK(xfrm_policy_lock);
|
||||
|
||||
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
|
||||
|
@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
|
|||
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
|
||||
static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
|
||||
static void xfrm_init_pmtu(struct dst_entry *dst);
|
||||
static int stale_bundle(struct dst_entry *dst);
|
||||
|
||||
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
|
||||
int dir);
|
||||
|
@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
|
|||
{
|
||||
BUG_ON(!policy->walk.dead);
|
||||
|
||||
BUG_ON(policy->bundles);
|
||||
|
||||
if (del_timer(&policy->timer))
|
||||
BUG();
|
||||
|
||||
|
@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
|
|||
|
||||
static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
while ((dst = policy->bundles) != NULL) {
|
||||
policy->bundles = dst->next;
|
||||
dst_free(dst);
|
||||
}
|
||||
atomic_inc(&policy->genid);
|
||||
|
||||
if (del_timer(&policy->timer))
|
||||
atomic_dec(&policy->refcnt);
|
||||
|
@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
|
|||
struct xfrm_policy *delpol;
|
||||
struct hlist_head *chain;
|
||||
struct hlist_node *entry, *newpos;
|
||||
struct dst_entry *gc_list;
|
||||
u32 mark = policy->mark.v & policy->mark.m;
|
||||
|
||||
write_lock_bh(&xfrm_policy_lock);
|
||||
|
@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
|
|||
else if (xfrm_bydst_should_resize(net, dir, NULL))
|
||||
schedule_work(&net->xfrm.policy_hash_work);
|
||||
|
||||
read_lock_bh(&xfrm_policy_lock);
|
||||
gc_list = NULL;
|
||||
entry = &policy->bydst;
|
||||
hlist_for_each_entry_continue(policy, entry, bydst) {
|
||||
struct dst_entry *dst;
|
||||
|
||||
write_lock(&policy->lock);
|
||||
dst = policy->bundles;
|
||||
if (dst) {
|
||||
struct dst_entry *tail = dst;
|
||||
while (tail->next)
|
||||
tail = tail->next;
|
||||
tail->next = gc_list;
|
||||
gc_list = dst;
|
||||
|
||||
policy->bundles = NULL;
|
||||
}
|
||||
write_unlock(&policy->lock);
|
||||
}
|
||||
read_unlock_bh(&xfrm_policy_lock);
|
||||
|
||||
while (gc_list) {
|
||||
struct dst_entry *dst = gc_list;
|
||||
|
||||
gc_list = dst->next;
|
||||
dst_free(dst);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(xfrm_policy_insert);
|
||||
|
@ -998,6 +965,19 @@ fail:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct xfrm_policy *
|
||||
__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
|
||||
{
|
||||
#ifdef CONFIG_XFRM_SUB_POLICY
|
||||
struct xfrm_policy *pol;
|
||||
|
||||
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
|
||||
if (pol != NULL)
|
||||
return pol;
|
||||
#endif
|
||||
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
|
||||
}
|
||||
|
||||
static struct flow_cache_object *
|
||||
xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
|
||||
u8 dir, struct flow_cache_object *old_obj, void *ctx)
|
||||
|
@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
|
|||
if (old_obj)
|
||||
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
|
||||
|
||||
#ifdef CONFIG_XFRM_SUB_POLICY
|
||||
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
|
||||
if (IS_ERR(pol))
|
||||
pol = __xfrm_policy_lookup(net, fl, family, dir);
|
||||
if (IS_ERR_OR_NULL(pol))
|
||||
return ERR_CAST(pol);
|
||||
if (pol)
|
||||
goto found;
|
||||
#endif
|
||||
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
|
||||
if (IS_ERR(pol))
|
||||
return ERR_CAST(pol);
|
||||
if (pol)
|
||||
goto found;
|
||||
return NULL;
|
||||
|
||||
found:
|
||||
/* Resolver returns two references:
|
||||
* one for cache and one for caller of flow_cache_lookup() */
|
||||
xfrm_pol_hold(pol);
|
||||
|
@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
|
|||
* still valid.
|
||||
*/
|
||||
|
||||
static struct dst_entry *
|
||||
xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
|
||||
{
|
||||
struct dst_entry *x;
|
||||
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
|
||||
if (unlikely(afinfo == NULL))
|
||||
return ERR_PTR(-EINVAL);
|
||||
x = afinfo->find_bundle(fl, policy);
|
||||
xfrm_policy_put_afinfo(afinfo);
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline int xfrm_get_tos(struct flowi *fl, int family)
|
||||
{
|
||||
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
|
||||
|
@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
|
|||
return tos;
|
||||
}
|
||||
|
||||
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
|
||||
{
|
||||
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
|
||||
struct dst_entry *dst = &xdst->u.dst;
|
||||
|
||||
if (xdst->route == NULL) {
|
||||
/* Dummy bundle - if it has xfrms we were not
|
||||
* able to build bundle as template resolution failed.
|
||||
* It means we need to try again resolving. */
|
||||
if (xdst->num_xfrms > 0)
|
||||
return NULL;
|
||||
} else {
|
||||
/* Real bundle */
|
||||
if (stale_bundle(dst))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dst_hold(dst);
|
||||
return flo;
|
||||
}
|
||||
|
||||
static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
|
||||
{
|
||||
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
|
||||
struct dst_entry *dst = &xdst->u.dst;
|
||||
|
||||
if (!xdst->route)
|
||||
return 0;
|
||||
if (stale_bundle(dst))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
|
||||
{
|
||||
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
|
||||
struct dst_entry *dst = &xdst->u.dst;
|
||||
|
||||
dst_free(dst);
|
||||
}
|
||||
|
||||
static const struct flow_cache_ops xfrm_bundle_fc_ops = {
|
||||
.get = xfrm_bundle_flo_get,
|
||||
.check = xfrm_bundle_flo_check,
|
||||
.delete = xfrm_bundle_flo_delete,
|
||||
};
|
||||
|
||||
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
|
||||
{
|
||||
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
|
||||
|
@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
|
|||
BUG();
|
||||
}
|
||||
xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
|
||||
|
||||
xfrm_policy_put_afinfo(afinfo);
|
||||
|
||||
xdst->flo.ops = &xfrm_bundle_fc_ops;
|
||||
|
||||
return xdst;
|
||||
}
|
||||
|
||||
|
@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
|
|||
return err;
|
||||
}
|
||||
|
||||
|
||||
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
|
||||
* all the metrics... Shortly, bundle a bundle.
|
||||
*/
|
||||
|
@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
|
|||
dst_hold(dst);
|
||||
|
||||
dst1->xfrm = xfrm[i];
|
||||
xdst->genid = xfrm[i]->genid;
|
||||
xdst->xfrm_genid = xfrm[i]->genid;
|
||||
|
||||
dst1->obsolete = -1;
|
||||
dst1->flags |= DST_HOST;
|
||||
|
@ -1558,7 +1565,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
|
|||
#endif
|
||||
}
|
||||
|
||||
static int stale_bundle(struct dst_entry *dst);
|
||||
static int xfrm_expand_policies(struct flowi *fl, u16 family,
|
||||
struct xfrm_policy **pols,
|
||||
int *num_pols, int *num_xfrms)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (*num_pols == 0 || !pols[0]) {
|
||||
*num_pols = 0;
|
||||
*num_xfrms = 0;
|
||||
return 0;
|
||||
}
|
||||
if (IS_ERR(pols[0]))
|
||||
return PTR_ERR(pols[0]);
|
||||
|
||||
*num_xfrms = pols[0]->xfrm_nr;
|
||||
|
||||
#ifdef CONFIG_XFRM_SUB_POLICY
|
||||
if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
|
||||
pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
|
||||
pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
|
||||
XFRM_POLICY_TYPE_MAIN,
|
||||
fl, family,
|
||||
XFRM_POLICY_OUT);
|
||||
if (pols[1]) {
|
||||
if (IS_ERR(pols[1])) {
|
||||
xfrm_pols_put(pols, *num_pols);
|
||||
return PTR_ERR(pols[1]);
|
||||
}
|
||||
(*num_pols) ++;
|
||||
(*num_xfrms) += pols[1]->xfrm_nr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (i = 0; i < *num_pols; i++) {
|
||||
if (pols[i]->action != XFRM_POLICY_ALLOW) {
|
||||
*num_xfrms = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static struct xfrm_dst *
|
||||
xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
|
||||
struct flowi *fl, u16 family,
|
||||
struct dst_entry *dst_orig)
|
||||
{
|
||||
struct net *net = xp_net(pols[0]);
|
||||
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
|
||||
struct dst_entry *dst;
|
||||
struct xfrm_dst *xdst;
|
||||
int err;
|
||||
|
||||
/* Try to instantiate a bundle */
|
||||
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
|
||||
if (err < 0) {
|
||||
if (err != -EAGAIN)
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
|
||||
if (IS_ERR(dst)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
|
||||
return ERR_CAST(dst);
|
||||
}
|
||||
|
||||
xdst = (struct xfrm_dst *)dst;
|
||||
xdst->num_xfrms = err;
|
||||
if (num_pols > 1)
|
||||
err = xfrm_dst_update_parent(dst, &pols[1]->selector);
|
||||
else
|
||||
err = xfrm_dst_update_origin(dst, fl);
|
||||
if (unlikely(err)) {
|
||||
dst_free(dst);
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
xdst->num_pols = num_pols;
|
||||
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
|
||||
xdst->policy_genid = atomic_read(&pols[0]->genid);
|
||||
|
||||
return xdst;
|
||||
}
|
||||
|
||||
static struct flow_cache_object *
|
||||
xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
|
||||
struct flow_cache_object *oldflo, void *ctx)
|
||||
{
|
||||
struct dst_entry *dst_orig = (struct dst_entry *)ctx;
|
||||
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
|
||||
struct xfrm_dst *xdst, *new_xdst;
|
||||
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
|
||||
|
||||
/* Check if the policies from old bundle are usable */
|
||||
xdst = NULL;
|
||||
if (oldflo) {
|
||||
xdst = container_of(oldflo, struct xfrm_dst, flo);
|
||||
num_pols = xdst->num_pols;
|
||||
num_xfrms = xdst->num_xfrms;
|
||||
pol_dead = 0;
|
||||
for (i = 0; i < num_pols; i++) {
|
||||
pols[i] = xdst->pols[i];
|
||||
pol_dead |= pols[i]->walk.dead;
|
||||
}
|
||||
if (pol_dead) {
|
||||
dst_free(&xdst->u.dst);
|
||||
xdst = NULL;
|
||||
num_pols = 0;
|
||||
num_xfrms = 0;
|
||||
oldflo = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Resolve policies to use if we couldn't get them from
|
||||
* previous cache entry */
|
||||
if (xdst == NULL) {
|
||||
num_pols = 1;
|
||||
pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
|
||||
err = xfrm_expand_policies(fl, family, pols,
|
||||
&num_pols, &num_xfrms);
|
||||
if (err < 0)
|
||||
goto inc_error;
|
||||
if (num_pols == 0)
|
||||
return NULL;
|
||||
if (num_xfrms <= 0)
|
||||
goto make_dummy_bundle;
|
||||
}
|
||||
|
||||
new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
|
||||
if (IS_ERR(new_xdst)) {
|
||||
err = PTR_ERR(new_xdst);
|
||||
if (err != -EAGAIN)
|
||||
goto error;
|
||||
if (oldflo == NULL)
|
||||
goto make_dummy_bundle;
|
||||
dst_hold(&xdst->u.dst);
|
||||
return oldflo;
|
||||
}
|
||||
|
||||
/* Kill the previous bundle */
|
||||
if (xdst) {
|
||||
/* The policies were stolen for newly generated bundle */
|
||||
xdst->num_pols = 0;
|
||||
dst_free(&xdst->u.dst);
|
||||
}
|
||||
|
||||
/* Flow cache does not have reference, it dst_free()'s,
|
||||
* but we do need to return one reference for original caller */
|
||||
dst_hold(&new_xdst->u.dst);
|
||||
return &new_xdst->flo;
|
||||
|
||||
make_dummy_bundle:
|
||||
/* We found policies, but there's no bundles to instantiate:
|
||||
* either because the policy blocks, has no transformations or
|
||||
* we could not build template (no xfrm_states).*/
|
||||
xdst = xfrm_alloc_dst(net, family);
|
||||
if (IS_ERR(xdst)) {
|
||||
xfrm_pols_put(pols, num_pols);
|
||||
return ERR_CAST(xdst);
|
||||
}
|
||||
xdst->num_pols = num_pols;
|
||||
xdst->num_xfrms = num_xfrms;
|
||||
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
|
||||
|
||||
dst_hold(&xdst->u.dst);
|
||||
return &xdst->flo;
|
||||
|
||||
inc_error:
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
|
||||
error:
|
||||
if (xdst != NULL)
|
||||
dst_free(&xdst->u.dst);
|
||||
else
|
||||
xfrm_pols_put(pols, num_pols);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/* Main function: finds/creates a bundle for given flow.
|
||||
*
|
||||
|
@ -1568,248 +1754,152 @@ static int stale_bundle(struct dst_entry *dst);
|
|||
int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
|
||||
struct sock *sk, int flags)
|
||||
{
|
||||
struct xfrm_policy *policy;
|
||||
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
|
||||
int npols;
|
||||
int pol_dead;
|
||||
int xfrm_nr;
|
||||
int pi;
|
||||
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
|
||||
struct dst_entry *dst, *dst_orig = *dst_p;
|
||||
int nx = 0;
|
||||
int err;
|
||||
u32 genid;
|
||||
u16 family;
|
||||
struct flow_cache_object *flo;
|
||||
struct xfrm_dst *xdst;
|
||||
struct dst_entry *dst, *dst_orig = *dst_p, *route;
|
||||
u16 family = dst_orig->ops->family;
|
||||
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
|
||||
int i, err, num_pols, num_xfrms, drop_pols = 0;
|
||||
|
||||
restart:
|
||||
genid = atomic_read(&flow_cache_genid);
|
||||
policy = NULL;
|
||||
for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
|
||||
pols[pi] = NULL;
|
||||
npols = 0;
|
||||
pol_dead = 0;
|
||||
xfrm_nr = 0;
|
||||
dst = NULL;
|
||||
xdst = NULL;
|
||||
route = NULL;
|
||||
|
||||
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
|
||||
policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
|
||||
err = PTR_ERR(policy);
|
||||
if (IS_ERR(policy)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
|
||||
num_pols = 1;
|
||||
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
|
||||
err = xfrm_expand_policies(fl, family, pols,
|
||||
&num_pols, &num_xfrms);
|
||||
if (err < 0)
|
||||
goto dropdst;
|
||||
|
||||
if (num_pols) {
|
||||
if (num_xfrms <= 0) {
|
||||
drop_pols = num_pols;
|
||||
goto no_transform;
|
||||
}
|
||||
|
||||
xdst = xfrm_resolve_and_create_bundle(
|
||||
pols, num_pols, fl,
|
||||
family, dst_orig);
|
||||
if (IS_ERR(xdst)) {
|
||||
xfrm_pols_put(pols, num_pols);
|
||||
err = PTR_ERR(xdst);
|
||||
goto dropdst;
|
||||
}
|
||||
|
||||
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
|
||||
xdst->u.dst.next = xfrm_policy_sk_bundles;
|
||||
xfrm_policy_sk_bundles = &xdst->u.dst;
|
||||
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
|
||||
|
||||
route = xdst->route;
|
||||
}
|
||||
}
|
||||
|
||||
if (!policy) {
|
||||
struct flow_cache_object *flo;
|
||||
|
||||
if (xdst == NULL) {
|
||||
/* To accelerate a bit... */
|
||||
if ((dst_orig->flags & DST_NOXFRM) ||
|
||||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
|
||||
goto nopol;
|
||||
|
||||
flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
|
||||
dir, xfrm_policy_lookup, NULL);
|
||||
err = PTR_ERR(flo);
|
||||
flo = flow_cache_lookup(net, fl, family, dir,
|
||||
xfrm_bundle_lookup, dst_orig);
|
||||
if (flo == NULL)
|
||||
goto nopol;
|
||||
if (IS_ERR(flo)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
|
||||
err = PTR_ERR(flo);
|
||||
goto dropdst;
|
||||
}
|
||||
if (flo)
|
||||
policy = container_of(flo, struct xfrm_policy, flo);
|
||||
else
|
||||
policy = NULL;
|
||||
xdst = container_of(flo, struct xfrm_dst, flo);
|
||||
|
||||
num_pols = xdst->num_pols;
|
||||
num_xfrms = xdst->num_xfrms;
|
||||
memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
|
||||
route = xdst->route;
|
||||
}
|
||||
|
||||
if (!policy)
|
||||
dst = &xdst->u.dst;
|
||||
if (route == NULL && num_xfrms > 0) {
|
||||
/* The only case when xfrm_bundle_lookup() returns a
|
||||
* bundle with null route, is when the template could
|
||||
* not be resolved. It means policies are there, but
|
||||
* bundle could not be created, since we don't yet
|
||||
* have the xfrm_state's. We need to wait for KM to
|
||||
* negotiate new SA's or bail out with error.*/
|
||||
if (net->xfrm.sysctl_larval_drop) {
|
||||
/* EREMOTE tells the caller to generate
|
||||
* a one-shot blackhole route. */
|
||||
dst_release(dst);
|
||||
xfrm_pols_put(pols, num_pols);
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
|
||||
return -EREMOTE;
|
||||
}
|
||||
if (flags & XFRM_LOOKUP_WAIT) {
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
add_wait_queue(&net->xfrm.km_waitq, &wait);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(&net->xfrm.km_waitq, &wait);
|
||||
|
||||
if (!signal_pending(current)) {
|
||||
dst_release(dst);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
err = -ERESTART;
|
||||
} else
|
||||
err = -EAGAIN;
|
||||
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
|
||||
goto error;
|
||||
}
|
||||
|
||||
no_transform:
|
||||
if (num_pols == 0)
|
||||
goto nopol;
|
||||
|
||||
family = dst_orig->ops->family;
|
||||
pols[0] = policy;
|
||||
npols ++;
|
||||
xfrm_nr += pols[0]->xfrm_nr;
|
||||
|
||||
err = -ENOENT;
|
||||
if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
|
||||
if ((flags & XFRM_LOOKUP_ICMP) &&
|
||||
!(pols[0]->flags & XFRM_POLICY_ICMP)) {
|
||||
err = -ENOENT;
|
||||
goto error;
|
||||
}
|
||||
|
||||
policy->curlft.use_time = get_seconds();
|
||||
for (i = 0; i < num_pols; i++)
|
||||
pols[i]->curlft.use_time = get_seconds();
|
||||
|
||||
switch (policy->action) {
|
||||
default:
|
||||
case XFRM_POLICY_BLOCK:
|
||||
if (num_xfrms < 0) {
|
||||
/* Prohibit the flow */
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
|
||||
err = -EPERM;
|
||||
goto error;
|
||||
|
||||
case XFRM_POLICY_ALLOW:
|
||||
#ifndef CONFIG_XFRM_SUB_POLICY
|
||||
if (policy->xfrm_nr == 0) {
|
||||
/* Flow passes not transformed. */
|
||||
xfrm_pol_put(policy);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Try to find matching bundle.
|
||||
*
|
||||
* LATER: help from flow cache. It is optional, this
|
||||
* is required only for output policy.
|
||||
*/
|
||||
dst = xfrm_find_bundle(fl, policy, family);
|
||||
if (IS_ERR(dst)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
|
||||
err = PTR_ERR(dst);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (dst)
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_XFRM_SUB_POLICY
|
||||
if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
|
||||
pols[1] = xfrm_policy_lookup_bytype(net,
|
||||
XFRM_POLICY_TYPE_MAIN,
|
||||
fl, family,
|
||||
XFRM_POLICY_OUT);
|
||||
if (pols[1]) {
|
||||
if (IS_ERR(pols[1])) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
|
||||
err = PTR_ERR(pols[1]);
|
||||
goto error;
|
||||
}
|
||||
if (pols[1]->action == XFRM_POLICY_BLOCK) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
|
||||
err = -EPERM;
|
||||
goto error;
|
||||
}
|
||||
npols ++;
|
||||
xfrm_nr += pols[1]->xfrm_nr;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Because neither flowi nor bundle information knows about
|
||||
* transformation template size. On more than one policy usage
|
||||
* we can realize whether all of them is bypass or not after
|
||||
* they are searched. See above not-transformed bypass
|
||||
* is surrounded by non-sub policy configuration, too.
|
||||
*/
|
||||
if (xfrm_nr == 0) {
|
||||
/* Flow passes not transformed. */
|
||||
xfrm_pols_put(pols, npols);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
|
||||
|
||||
if (unlikely(nx<0)) {
|
||||
err = nx;
|
||||
if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
|
||||
/* EREMOTE tells the caller to generate
|
||||
* a one-shot blackhole route.
|
||||
*/
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
|
||||
xfrm_pol_put(policy);
|
||||
return -EREMOTE;
|
||||
}
|
||||
if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
add_wait_queue(&net->xfrm.km_waitq, &wait);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
set_current_state(TASK_RUNNING);
|
||||
remove_wait_queue(&net->xfrm.km_waitq, &wait);
|
||||
|
||||
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
|
||||
|
||||
if (nx == -EAGAIN && signal_pending(current)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
|
||||
err = -ERESTART;
|
||||
goto error;
|
||||
}
|
||||
if (nx == -EAGAIN ||
|
||||
genid != atomic_read(&flow_cache_genid)) {
|
||||
xfrm_pols_put(pols, npols);
|
||||
goto restart;
|
||||
}
|
||||
err = nx;
|
||||
}
|
||||
if (err < 0) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
if (nx == 0) {
|
||||
/* Flow passes not transformed. */
|
||||
xfrm_pols_put(pols, npols);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
|
||||
err = PTR_ERR(dst);
|
||||
if (IS_ERR(dst)) {
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (pi = 0; pi < npols; pi++)
|
||||
pol_dead |= pols[pi]->walk.dead;
|
||||
|
||||
write_lock_bh(&policy->lock);
|
||||
if (unlikely(pol_dead || stale_bundle(dst))) {
|
||||
/* Wow! While we worked on resolving, this
|
||||
* policy has gone. Retry. It is not paranoia,
|
||||
* we just cannot enlist new bundle to dead object.
|
||||
* We can't enlist stable bundles either.
|
||||
*/
|
||||
write_unlock_bh(&policy->lock);
|
||||
dst_free(dst);
|
||||
|
||||
if (pol_dead)
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
|
||||
else
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
|
||||
err = -EHOSTUNREACH;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (npols > 1)
|
||||
err = xfrm_dst_update_parent(dst, &pols[1]->selector);
|
||||
else
|
||||
err = xfrm_dst_update_origin(dst, fl);
|
||||
if (unlikely(err)) {
|
||||
write_unlock_bh(&policy->lock);
|
||||
dst_free(dst);
|
||||
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
|
||||
goto error;
|
||||
}
|
||||
|
||||
dst->next = policy->bundles;
|
||||
policy->bundles = dst;
|
||||
dst_hold(dst);
|
||||
write_unlock_bh(&policy->lock);
|
||||
} else if (num_xfrms > 0) {
|
||||
/* Flow transformed */
|
||||
*dst_p = dst;
|
||||
dst_release(dst_orig);
|
||||
} else {
|
||||
/* Flow passes untransformed */
|
||||
dst_release(dst);
|
||||
}
|
||||
*dst_p = dst;
|
||||
dst_release(dst_orig);
|
||||
xfrm_pols_put(pols, npols);
|
||||
ok:
|
||||
xfrm_pols_put(pols, drop_pols);
|
||||
return 0;
|
||||
|
||||
nopol:
|
||||
if (!(flags & XFRM_LOOKUP_ICMP))
|
||||
goto ok;
|
||||
err = -ENOENT;
|
||||
error:
|
||||
xfrm_pols_put(pols, npols);
|
||||
dst_release(dst);
|
||||
dropdst:
|
||||
dst_release(dst_orig);
|
||||
*dst_p = NULL;
|
||||
xfrm_pols_put(pols, drop_pols);
|
||||
return err;
|
||||
|
||||
nopol:
|
||||
err = -ENOENT;
|
||||
if (flags & XFRM_LOOKUP_ICMP)
|
||||
goto dropdst;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__xfrm_lookup);
|
||||
|
||||
|
@ -2161,69 +2251,22 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
|
|||
return dst;
|
||||
}
|
||||
|
||||
static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
|
||||
{
|
||||
struct dst_entry *dst, **dstp;
|
||||
|
||||
write_lock(&pol->lock);
|
||||
dstp = &pol->bundles;
|
||||
while ((dst=*dstp) != NULL) {
|
||||
if (func(dst)) {
|
||||
*dstp = dst->next;
|
||||
dst->next = *gc_list_p;
|
||||
*gc_list_p = dst;
|
||||
} else {
|
||||
dstp = &dst->next;
|
||||
}
|
||||
}
|
||||
write_unlock(&pol->lock);
|
||||
}
|
||||
|
||||
static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
|
||||
{
|
||||
struct dst_entry *gc_list = NULL;
|
||||
int dir;
|
||||
|
||||
read_lock_bh(&xfrm_policy_lock);
|
||||
for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
|
||||
struct xfrm_policy *pol;
|
||||
struct hlist_node *entry;
|
||||
struct hlist_head *table;
|
||||
int i;
|
||||
|
||||
hlist_for_each_entry(pol, entry,
|
||||
&net->xfrm.policy_inexact[dir], bydst)
|
||||
prune_one_bundle(pol, func, &gc_list);
|
||||
|
||||
table = net->xfrm.policy_bydst[dir].table;
|
||||
for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
|
||||
hlist_for_each_entry(pol, entry, table + i, bydst)
|
||||
prune_one_bundle(pol, func, &gc_list);
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&xfrm_policy_lock);
|
||||
|
||||
while (gc_list) {
|
||||
struct dst_entry *dst = gc_list;
|
||||
gc_list = dst->next;
|
||||
dst_free(dst);
|
||||
}
|
||||
}
|
||||
|
||||
static int unused_bundle(struct dst_entry *dst)
|
||||
{
|
||||
return !atomic_read(&dst->__refcnt);
|
||||
}
|
||||
|
||||
static void __xfrm_garbage_collect(struct net *net)
|
||||
{
|
||||
xfrm_prune_bundles(net, unused_bundle);
|
||||
}
|
||||
struct dst_entry *head, *next;
|
||||
|
||||
static int xfrm_flush_bundles(struct net *net)
|
||||
{
|
||||
xfrm_prune_bundles(net, stale_bundle);
|
||||
return 0;
|
||||
flow_cache_flush();
|
||||
|
||||
spin_lock_bh(&xfrm_policy_sk_bundle_lock);
|
||||
head = xfrm_policy_sk_bundles;
|
||||
xfrm_policy_sk_bundles = NULL;
|
||||
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
|
||||
|
||||
while (head) {
|
||||
next = head->next;
|
||||
dst_free(head);
|
||||
head = next;
|
||||
}
|
||||
}
|
||||
|
||||
static void xfrm_init_pmtu(struct dst_entry *dst)
|
||||
|
@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
|
|||
return 0;
|
||||
if (dst->xfrm->km.state != XFRM_STATE_VALID)
|
||||
return 0;
|
||||
if (xdst->genid != dst->xfrm->genid)
|
||||
if (xdst->xfrm_genid != dst->xfrm->genid)
|
||||
return 0;
|
||||
if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
|
||||
return 0;
|
||||
|
||||
if (strict && fl &&
|
||||
|
@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
|
|||
|
||||
switch (event) {
|
||||
case NETDEV_DOWN:
|
||||
xfrm_flush_bundles(dev_net(dev));
|
||||
__xfrm_garbage_collect(dev_net(dev));
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
|
|||
struct xfrm_migrate *m, int num_migrate)
|
||||
{
|
||||
struct xfrm_migrate *mp;
|
||||
struct dst_entry *dst;
|
||||
int i, j, n = 0;
|
||||
|
||||
write_lock_bh(&pol->lock);
|
||||
|
@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
|
|||
sizeof(pol->xfrm_vec[i].saddr));
|
||||
pol->xfrm_vec[i].encap_family = mp->new_family;
|
||||
/* flush bundles */
|
||||
while ((dst = pol->bundles) != NULL) {
|
||||
pol->bundles = dst->next;
|
||||
dst_free(dst);
|
||||
}
|
||||
atomic_inc(&pol->genid);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue