22b6722bfa
Reflecting IPv6 Flow Label at server nodes is useful in environments that employ multipath routing to load balance the requests. As "IPv6 Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error messages generated in response to a downstream packets from the server can be routed by a load balancer back to the original server without looking at transport headers, if the server applies the flow label reflection. This enables the Path MTU Discovery past the ECMP router in load-balance or anycast environments where each server node is reachable by only one path. Introduce a sysctl to enable flow label reflection per net namespace for all newly created sockets. Same could be earlier achieved only per socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR socket option. [1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01 Signed-off-by: Jakub Sitnicki <jkbs@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
248 lines
6 KiB
C
248 lines
6 KiB
C
/*
|
|
* sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
|
|
*
|
|
* Changes:
|
|
* YOSHIFUJI Hideaki @USAGI: added icmp sysctl table.
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/in6.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/export.h>
|
|
#include <net/ndisc.h>
|
|
#include <net/ipv6.h>
|
|
#include <net/addrconf.h>
|
|
#include <net/inet_frag.h>
|
|
#ifdef CONFIG_NETLABEL
|
|
#include <net/calipso.h>
|
|
#endif
|
|
|
|
static int one = 1;
|
|
static int auto_flowlabels_min;
|
|
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
|
|
|
|
|
|
static struct ctl_table ipv6_table_template[] = {
|
|
{
|
|
.procname = "bindv6only",
|
|
.data = &init_net.ipv6.sysctl.bindv6only,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "anycast_src_echo_reply",
|
|
.data = &init_net.ipv6.sysctl.anycast_src_echo_reply,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "flowlabel_consistency",
|
|
.data = &init_net.ipv6.sysctl.flowlabel_consistency,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "auto_flowlabels",
|
|
.data = &init_net.ipv6.sysctl.auto_flowlabels,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &auto_flowlabels_min,
|
|
.extra2 = &auto_flowlabels_max
|
|
},
|
|
{
|
|
.procname = "fwmark_reflect",
|
|
.data = &init_net.ipv6.sysctl.fwmark_reflect,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "idgen_retries",
|
|
.data = &init_net.ipv6.sysctl.idgen_retries,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{
|
|
.procname = "idgen_delay",
|
|
.data = &init_net.ipv6.sysctl.idgen_delay,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
},
|
|
{
|
|
.procname = "flowlabel_state_ranges",
|
|
.data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "ip_nonlocal_bind",
|
|
.data = &init_net.ipv6.sysctl.ip_nonlocal_bind,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "flowlabel_reflect",
|
|
.data = &init_net.ipv6.sysctl.flowlabel_reflect,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{ }
|
|
};
|
|
|
|
static struct ctl_table ipv6_rotable[] = {
|
|
{
|
|
.procname = "mld_max_msf",
|
|
.data = &sysctl_mld_max_msf,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
.procname = "mld_qrv",
|
|
.data = &sysctl_mld_qrv,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec_minmax,
|
|
.extra1 = &one
|
|
},
|
|
#ifdef CONFIG_NETLABEL
|
|
{
|
|
.procname = "calipso_cache_enable",
|
|
.data = &calipso_cache_enabled,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
{
|
|
.procname = "calipso_cache_bucket_size",
|
|
.data = &calipso_cache_bucketsize,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = proc_dointvec,
|
|
},
|
|
#endif /* CONFIG_NETLABEL */
|
|
{ }
|
|
};
|
|
|
|
static int __net_init ipv6_sysctl_net_init(struct net *net)
|
|
{
|
|
struct ctl_table *ipv6_table;
|
|
struct ctl_table *ipv6_route_table;
|
|
struct ctl_table *ipv6_icmp_table;
|
|
int err;
|
|
|
|
err = -ENOMEM;
|
|
ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template),
|
|
GFP_KERNEL);
|
|
if (!ipv6_table)
|
|
goto out;
|
|
ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
|
|
ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
|
|
ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
|
|
ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
|
|
ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
|
|
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
|
|
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
|
|
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
|
|
ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
|
|
ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
|
|
|
|
ipv6_route_table = ipv6_route_sysctl_init(net);
|
|
if (!ipv6_route_table)
|
|
goto out_ipv6_table;
|
|
|
|
ipv6_icmp_table = ipv6_icmp_sysctl_init(net);
|
|
if (!ipv6_icmp_table)
|
|
goto out_ipv6_route_table;
|
|
|
|
net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table);
|
|
if (!net->ipv6.sysctl.hdr)
|
|
goto out_ipv6_icmp_table;
|
|
|
|
net->ipv6.sysctl.route_hdr =
|
|
register_net_sysctl(net, "net/ipv6/route", ipv6_route_table);
|
|
if (!net->ipv6.sysctl.route_hdr)
|
|
goto out_unregister_ipv6_table;
|
|
|
|
net->ipv6.sysctl.icmp_hdr =
|
|
register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table);
|
|
if (!net->ipv6.sysctl.icmp_hdr)
|
|
goto out_unregister_route_table;
|
|
|
|
err = 0;
|
|
out:
|
|
return err;
|
|
out_unregister_route_table:
|
|
unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
|
|
out_unregister_ipv6_table:
|
|
unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
|
|
out_ipv6_icmp_table:
|
|
kfree(ipv6_icmp_table);
|
|
out_ipv6_route_table:
|
|
kfree(ipv6_route_table);
|
|
out_ipv6_table:
|
|
kfree(ipv6_table);
|
|
goto out;
|
|
}
|
|
|
|
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
|
|
{
|
|
struct ctl_table *ipv6_table;
|
|
struct ctl_table *ipv6_route_table;
|
|
struct ctl_table *ipv6_icmp_table;
|
|
|
|
ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
|
|
ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
|
|
ipv6_icmp_table = net->ipv6.sysctl.icmp_hdr->ctl_table_arg;
|
|
|
|
unregister_net_sysctl_table(net->ipv6.sysctl.icmp_hdr);
|
|
unregister_net_sysctl_table(net->ipv6.sysctl.route_hdr);
|
|
unregister_net_sysctl_table(net->ipv6.sysctl.hdr);
|
|
|
|
kfree(ipv6_table);
|
|
kfree(ipv6_route_table);
|
|
kfree(ipv6_icmp_table);
|
|
}
|
|
|
|
static struct pernet_operations ipv6_sysctl_net_ops = {
|
|
.init = ipv6_sysctl_net_init,
|
|
.exit = ipv6_sysctl_net_exit,
|
|
};
|
|
|
|
static struct ctl_table_header *ip6_header;
|
|
|
|
int ipv6_sysctl_register(void)
|
|
{
|
|
int err = -ENOMEM;
|
|
|
|
ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable);
|
|
if (!ip6_header)
|
|
goto out;
|
|
|
|
err = register_pernet_subsys(&ipv6_sysctl_net_ops);
|
|
if (err)
|
|
goto err_pernet;
|
|
out:
|
|
return err;
|
|
|
|
err_pernet:
|
|
unregister_net_sysctl_table(ip6_header);
|
|
goto out;
|
|
}
|
|
|
|
void ipv6_sysctl_unregister(void)
|
|
{
|
|
unregister_net_sysctl_table(ip6_header);
|
|
unregister_pernet_subsys(&ipv6_sysctl_net_ops);
|
|
}
|