[NETNS]: Namespace stop vs 'ip r l' race.
During network namespace stop process kernel side netlink sockets belonging to a namespace should be closed. They should not prevent namespace to stop, so they do not increment namespace usage counter. Though this counter will be put during last sock_put. The raplacement of the correct netns for init_ns solves the problem only partial as socket to be stoped until proper stop is a valid netlink kernel socket and can be looked up by the user processes. This is not a problem until it resides in initial namespace (no processes inside this net), but this is not true for init_net. So, hold the referrence for a socket, remove it from lookup tables and only after that change namespace and perform a last put. Signed-off-by: Denis V. Lunev <den@openvz.org> Tested-by: Alexey Dobriyan <adobriyan@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b7c6ba6eb1
commit
775516bfa2
3 changed files with 18 additions and 19 deletions
|
@ -1368,25 +1368,14 @@ static int rtnetlink_net_init(struct net *net)
|
|||
rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
|
||||
if (!sk)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Don't hold an extra reference on the namespace */
|
||||
put_net(sk->sk_net);
|
||||
net->rtnl = sk;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rtnetlink_net_exit(struct net *net)
|
||||
{
|
||||
struct sock *sk = net->rtnl;
|
||||
if (sk) {
|
||||
/* At the last minute lie and say this is a socket for the
|
||||
* initial network namespace. So the socket will be safe to
|
||||
* free.
|
||||
*/
|
||||
sk->sk_net = get_net(&init_net);
|
||||
netlink_kernel_release(net->rtnl);
|
||||
net->rtnl = NULL;
|
||||
}
|
||||
netlink_kernel_release(net->rtnl);
|
||||
net->rtnl = NULL;
|
||||
}
|
||||
|
||||
static struct pernet_operations rtnetlink_net_ops = {
|
||||
|
|
|
@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net)
|
|||
nl_fib_input, NULL, THIS_MODULE);
|
||||
if (sk == NULL)
|
||||
return -EAFNOSUPPORT;
|
||||
/* Don't hold an extra reference on the namespace */
|
||||
put_net(sk->sk_net);
|
||||
net->ipv4.fibnl = sk;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nl_fib_lookup_exit(struct net *net)
|
||||
{
|
||||
/* At the last minute lie and say this is a socket for the
|
||||
* initial network namespace. So the socket will be safe to free.
|
||||
*/
|
||||
net->ipv4.fibnl->sk_net = get_net(&init_net);
|
||||
netlink_kernel_release(net->ipv4.fibnl);
|
||||
net->ipv4.fibnl = NULL;
|
||||
}
|
||||
|
||||
static void fib_disable_ip(struct net_device *dev, int force)
|
||||
|
|
|
@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
|
|||
}
|
||||
netlink_table_ungrab();
|
||||
|
||||
/* Do not hold an extra referrence to a namespace as this socket is
|
||||
* internal to a namespace and does not prevent it to stop. */
|
||||
put_net(net);
|
||||
return sk;
|
||||
|
||||
out_sock_release:
|
||||
|
@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk)
|
|||
{
|
||||
if (sk == NULL || sk->sk_socket == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Last sock_put should drop referrence to sk->sk_net. It has already
|
||||
* been dropped in netlink_kernel_create. Taking referrence to stopping
|
||||
* namespace is not an option.
|
||||
* Take referrence to a socket to remove it from netlink lookup table
|
||||
* _alive_ and after that destroy it in the context of init_net.
|
||||
*/
|
||||
sock_hold(sk);
|
||||
sock_release(sk->sk_socket);
|
||||
|
||||
sk->sk_net = get_net(&init_net);
|
||||
sock_put(sk);
|
||||
}
|
||||
EXPORT_SYMBOL(netlink_kernel_release);
|
||||
|
||||
|
|
Loading…
Reference in a new issue