ipvs: convert services to rcu

This is the final step in RCU conversion.

Things that are removed:

- svc->usecnt: now svc is accessed under RCU read lock
- svc->inc: and some unused code
- ip_vs_bind_pe and ip_vs_unbind_pe: no ability to replace PE
- __ip_vs_svc_lock: replaced with RCU
- IP_VS_WAIT_WHILE: now readers lookup svcs and dests under
	RCU and work in parallel with configuration

Other changes:

- before now, a RCU read-side critical section included the
calling of the schedule method, now it is extended to include
service lookup
- ip_vs_svc_table and ip_vs_svc_fwm_table are now using hlist
- svc->pe and svc->scheduler remain to the end (of grace period),
	the schedulers are prepared for such RCU readers
	even after done_service is called but they need
	to use synchronize_rcu because last ip_vs_scheduler_put
	can happen while RCU read-side critical sections
	use an outdated svc->scheduler pointer
- as planned, update_service is removed
- empty services can be freed immediately after grace period.
	If dests were present, the services are freed from
	the dest trash code

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
Julian Anastasov 2013-03-22 11:46:53 +02:00 committed by Pablo Neira Ayuso
parent 413c2d04e9
commit ceec4c3816
18 changed files with 186 additions and 259 deletions

View file

@ -359,8 +359,6 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
#define LeaveFunction(level) do {} while (0) #define LeaveFunction(level) do {} while (0)
#endif #endif
#define IP_VS_WAIT_WHILE(expr) while (expr) { cpu_relax(); }
/* /*
* The port number of FTP service (in network order). * The port number of FTP service (in network order).
@ -712,10 +710,9 @@ struct ip_vs_dest_user_kern {
* and the forwarding entries * and the forwarding entries
*/ */
struct ip_vs_service { struct ip_vs_service {
struct list_head s_list; /* for normal service table */ struct hlist_node s_list; /* for normal service table */
struct list_head f_list; /* for fwmark-based service table */ struct hlist_node f_list; /* for fwmark-based service table */
atomic_t refcnt; /* reference counter */ atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */
u16 af; /* address family */ u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */ __u16 protocol; /* which protocol (TCP/UDP) */
@ -730,15 +727,16 @@ struct ip_vs_service {
struct list_head destinations; /* real server d-linked list */ struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */ __u32 num_dests; /* number of servers */
struct ip_vs_stats stats; /* statistics for the service */ struct ip_vs_stats stats; /* statistics for the service */
struct ip_vs_app *inc; /* bind conns to this app inc */
/* for scheduling */ /* for scheduling */
struct ip_vs_scheduler *scheduler; /* bound scheduler object */ struct ip_vs_scheduler __rcu *scheduler; /* bound scheduler object */
spinlock_t sched_lock; /* lock sched_data */ spinlock_t sched_lock; /* lock sched_data */
void *sched_data; /* scheduler application data */ void *sched_data; /* scheduler application data */
/* alternate persistence engine */ /* alternate persistence engine */
struct ip_vs_pe *pe; struct ip_vs_pe __rcu *pe;
struct rcu_head rcu_head;
}; };
/* Information for cached dst */ /* Information for cached dst */
@ -807,8 +805,6 @@ struct ip_vs_scheduler {
int (*init_service)(struct ip_vs_service *svc); int (*init_service)(struct ip_vs_service *svc);
/* scheduling service finish */ /* scheduling service finish */
void (*done_service)(struct ip_vs_service *svc); void (*done_service)(struct ip_vs_service *svc);
/* scheduler updating service */
int (*update_service)(struct ip_vs_service *svc);
/* dest is linked */ /* dest is linked */
int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest); int (*add_dest)(struct ip_vs_service *svc, struct ip_vs_dest *dest);
/* dest is unlinked */ /* dest is unlinked */
@ -1344,8 +1340,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe); int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
@ -1392,7 +1386,8 @@ extern int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); extern int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler);
extern int ip_vs_bind_scheduler(struct ip_vs_service *svc, extern int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler); struct ip_vs_scheduler *scheduler);
extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched);
extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn * extern struct ip_vs_conn *
@ -1412,14 +1407,9 @@ extern struct ip_vs_stats ip_vs_stats;
extern int sysctl_ip_vs_sync_ver; extern int sysctl_ip_vs_sync_ver;
extern struct ip_vs_service * extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport); const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
atomic_dec(&svc->usecnt);
}
extern bool extern bool
ip_vs_has_real_service(struct net *net, int af, __u16 protocol, ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport); const union nf_inet_addr *daddr, __be16 dport);

View file

@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{ {
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p); vport, p);
p->pe = svc->pe; p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param) if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb); return p->pe->fill_param(p, skb);
@ -296,15 +296,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Check if a template already exists */ /* Check if a template already exists */
ct = ip_vs_ct_in_get(&param); ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) { if (!ct || !ip_vs_check_template(ct)) {
struct ip_vs_scheduler *sched;
/* /*
* No template found or the dest of the connection * No template found or the dest of the connection
* template is not available. * template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP * return *ignored=0 i.e. ICMP and NF_DROP
*/ */
rcu_read_lock(); sched = rcu_dereference(svc->scheduler);
dest = svc->scheduler->schedule(svc, skb); dest = sched->schedule(svc, skb);
if (!dest) { if (!dest) {
rcu_read_unlock();
IP_VS_DBG(1, "p-schedule: no dest found.\n"); IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data); kfree(param.pe_data);
*ignored = 0; *ignored = 0;
@ -320,7 +321,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* when the template expires */ * when the template expires */
ct = ip_vs_conn_new(&param, &dest->addr, dport, ct = ip_vs_conn_new(&param, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark); IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
rcu_read_unlock();
if (ct == NULL) { if (ct == NULL) {
kfree(param.pe_data); kfree(param.pe_data);
*ignored = -1; *ignored = -1;
@ -394,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{ {
struct ip_vs_protocol *pp = pd->pp; struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL; struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
unsigned int flags; unsigned int flags;
@ -449,10 +450,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL; return NULL;
} }
rcu_read_lock(); sched = rcu_dereference(svc->scheduler);
dest = svc->scheduler->schedule(svc, skb); dest = sched->schedule(svc, skb);
if (dest == NULL) { if (dest == NULL) {
rcu_read_unlock();
IP_VS_DBG(1, "Schedule: no dest found.\n"); IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL; return NULL;
} }
@ -473,7 +473,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
cp = ip_vs_conn_new(&p, &dest->addr, cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1], dest->port ? dest->port : pptr[1],
flags, dest, skb->mark); flags, dest, skb->mark);
rcu_read_unlock();
if (!cp) { if (!cp) {
*ignored = -1; *ignored = -1;
return NULL; return NULL;
@ -510,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) { if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP; return NF_DROP;
} }
@ -536,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_CONN_F_ONE_PACKET : 0; IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
ip_vs_service_put(svc);
/* create a new connection entry */ /* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{ {
@ -574,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is * listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets. * not ipvs job to decide to drop the packets.
*/ */
if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
ip_vs_service_put(svc);
return NF_ACCEPT; return NF_ACCEPT;
}
ip_vs_service_put(svc);
/* /*
* Notify the client that the destination is unreachable, and * Notify the client that the destination is unreachable, and

View file

@ -55,9 +55,6 @@
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex); static DEFINE_MUTEX(__ip_vs_mutex);
/* lock for service table */
static DEFINE_RWLOCK(__ip_vs_svc_lock);
/* sysctl variables */ /* sysctl variables */
#ifdef CONFIG_IP_VS_DEBUG #ifdef CONFIG_IP_VS_DEBUG
@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
/* the service table hashed by <protocol, addr, port> */ /* the service table hashed by <protocol, addr, port> */
static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
/* the service table hashed by fwmark */ /* the service table hashed by fwmark */
static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
/* /*
@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
*/ */
hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
&svc->addr, svc->port); &svc->addr, svc->port);
list_add(&svc->s_list, &ip_vs_svc_table[hash]); hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else { } else {
/* /*
* Hash it by fwmark in svc_fwm_table * Hash it by fwmark in svc_fwm_table
*/ */
hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
} }
svc->flags |= IP_VS_SVC_F_HASHED; svc->flags |= IP_VS_SVC_F_HASHED;
@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
if (svc->fwmark == 0) { if (svc->fwmark == 0) {
/* Remove it from the svc_table table */ /* Remove it from the svc_table table */
list_del(&svc->s_list); hlist_del_rcu(&svc->s_list);
} else { } else {
/* Remove it from the svc_fwm_table table */ /* Remove it from the svc_fwm_table table */
list_del(&svc->f_list); hlist_del_rcu(&svc->f_list);
} }
svc->flags &= ~IP_VS_SVC_F_HASHED; svc->flags &= ~IP_VS_SVC_F_HASHED;
@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
/* Check for "full" addressed entries */ /* Check for "full" addressed entries */
hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af) if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr) && ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport) && (svc->port == vport)
@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Check for fwmark addressed entries */ /* Check for fwmark addressed entries */
hash = ip_vs_svc_fwm_hashkey(net, fwmark); hash = ip_vs_svc_fwm_hashkey(net, fwmark);
list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af if (svc->fwmark == fwmark && svc->af == af
&& net_eq(svc->net, net)) { && net_eq(svc->net, net)) {
/* HIT */ /* HIT */
@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
return NULL; return NULL;
} }
/* Find service, called under RCU lock */
struct ip_vs_service * struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport) const union nf_inet_addr *vaddr, __be16 vport)
{ {
struct ip_vs_service *svc; struct ip_vs_service *svc;
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
read_lock(&__ip_vs_svc_lock);
/* /*
* Check the table hashed by fwmark first * Check the table hashed by fwmark first
*/ */
@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
} }
out: out:
if (svc)
atomic_inc(&svc->usecnt);
read_unlock(&__ip_vs_svc_lock);
IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
fwmark, ip_vs_proto_name(protocol), fwmark, ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
dest->svc = svc; dest->svc = svc;
} }
static void ip_vs_service_free(struct ip_vs_service *svc)
{
if (svc->stats.cpustats)
free_percpu(svc->stats.cpustats);
kfree(svc);
}
static void static void
__ip_vs_unbind_svc(struct ip_vs_dest *dest) __ip_vs_unbind_svc(struct ip_vs_dest *dest)
{ {
@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
dest->svc = NULL; dest->svc = NULL;
if (atomic_dec_and_test(&svc->refcnt)) { if (atomic_dec_and_test(&svc->refcnt)) {
IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark, svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr), IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port), atomic_read(&svc->usecnt)); ntohs(svc->port));
free_percpu(svc->stats.cpustats); ip_vs_service_free(svc);
kfree(svc);
} }
} }
@ -608,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc; struct ip_vs_service *svc;
__be16 port = dport; __be16 port = dport;
svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
if (!svc) if (!svc)
return NULL; return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@ -616,7 +614,6 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
dest = ip_vs_lookup_dest(svc, daddr, port); dest = ip_vs_lookup_dest(svc, daddr, port);
if (!dest) if (!dest)
dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
ip_vs_service_put(svc);
return dest; return dest;
} }
@ -774,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add) struct ip_vs_dest_user_kern *udest, int add)
{ {
struct netns_ipvs *ipvs = net_ipvs(svc->net); struct netns_ipvs *ipvs = net_ipvs(svc->net);
struct ip_vs_scheduler *sched;
int conn_flags; int conn_flags;
/* set the weight and the flags */ /* set the weight and the flags */
@ -816,29 +814,17 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
__ip_vs_dst_cache_reset(dest); __ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock); spin_unlock_bh(&dest->dst_lock);
if (add) sched = rcu_dereference_protected(svc->scheduler, 1);
ip_vs_start_estimator(svc->net, &dest->stats);
write_lock_bh(&__ip_vs_svc_lock);
/* Wait until all other svc users go away */
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
if (add) { if (add) {
ip_vs_start_estimator(svc->net, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations); list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++; svc->num_dests++;
if (svc->scheduler->add_dest) if (sched->add_dest)
svc->scheduler->add_dest(svc, dest); sched->add_dest(svc, dest);
} else { } else {
if (svc->scheduler->upd_dest) if (sched->upd_dest)
svc->scheduler->upd_dest(svc, dest); sched->upd_dest(svc, dest);
} }
/* call the update_service, because server weight may be changed */
if (svc->scheduler->update_service)
svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
} }
@ -1071,14 +1057,13 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
list_del_rcu(&dest->n_list); list_del_rcu(&dest->n_list);
svc->num_dests--; svc->num_dests--;
if (svcupd && svc->scheduler->del_dest) if (svcupd) {
svc->scheduler->del_dest(svc, dest); struct ip_vs_scheduler *sched;
/* sched = rcu_dereference_protected(svc->scheduler, 1);
* Call the update_service function of its scheduler if (sched->del_dest)
*/ sched->del_dest(svc, dest);
if (svcupd && svc->scheduler->update_service) }
svc->scheduler->update_service(svc);
} }
@ -1103,20 +1088,11 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ENOENT; return -ENOENT;
} }
write_lock_bh(&__ip_vs_svc_lock);
/*
* Wait until all other svc users go away.
*/
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
/* /*
* Unlink dest from the service * Unlink dest from the service
*/ */
__ip_vs_unlink_dest(svc, dest, 1); __ip_vs_unlink_dest(svc, dest, 1);
write_unlock_bh(&__ip_vs_svc_lock);
/* /*
* Delete the destination * Delete the destination
*/ */
@ -1207,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
} }
/* I'm the first user of the service */ /* I'm the first user of the service */
atomic_set(&svc->usecnt, 0);
atomic_set(&svc->refcnt, 0); atomic_set(&svc->refcnt, 0);
svc->af = u->af; svc->af = u->af;
@ -1231,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
sched = NULL; sched = NULL;
/* Bind the ct retriever */ /* Bind the ct retriever */
ip_vs_bind_pe(svc, pe); RCU_INIT_POINTER(svc->pe, pe);
pe = NULL; pe = NULL;
/* Update the virtual service counters */ /* Update the virtual service counters */
@ -1247,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ipvs->num_services++; ipvs->num_services++;
/* Hash the service into the service table */ /* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_hash(svc); ip_vs_svc_hash(svc);
write_unlock_bh(&__ip_vs_svc_lock);
*svc_p = svc; *svc_p = svc;
/* Now there is a service - full throttle */ /* Now there is a service - full throttle */
@ -1259,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
out_err: out_err:
if (svc != NULL) { if (svc != NULL) {
ip_vs_unbind_scheduler(svc); ip_vs_unbind_scheduler(svc, sched);
if (svc->inc) { ip_vs_service_free(svc);
local_bh_disable();
ip_vs_app_inc_put(svc->inc);
local_bh_enable();
}
if (svc->stats.cpustats)
free_percpu(svc->stats.cpustats);
kfree(svc);
} }
ip_vs_scheduler_put(sched); ip_vs_scheduler_put(sched);
ip_vs_pe_put(pe); ip_vs_pe_put(pe);
@ -1317,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
} }
#endif #endif
write_lock_bh(&__ip_vs_svc_lock); old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
/* /* Bind the new scheduler */
* Wait until all other svc users go away. ret = ip_vs_bind_scheduler(svc, sched);
*/ if (ret) {
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); old_sched = sched;
goto out;
}
/* Unbind the old scheduler on success */
ip_vs_unbind_scheduler(svc, old_sched);
}
/* /*
* Set the flags and timeout value * Set the flags and timeout value
@ -1331,47 +1302,23 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
svc->timeout = u->timeout * HZ; svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask; svc->netmask = u->netmask;
old_sched = svc->scheduler; old_pe = rcu_dereference_protected(svc->pe, 1);
if (sched != old_sched) { if (pe != old_pe)
/* rcu_assign_pointer(svc->pe, pe);
* Unbind the old scheduler
*/
ip_vs_unbind_scheduler(svc);
/*
* Bind the new scheduler
*/
if ((ret = ip_vs_bind_scheduler(svc, sched))) {
/*
* If ip_vs_bind_scheduler fails, restore the old
* scheduler.
* The main reason of failure is out of memory.
*
* The question is if the old scheduler can be
* restored all the time. TODO: if it cannot be
* restored some time, we must delete the service,
* otherwise the system may crash.
*/
ip_vs_bind_scheduler(svc, old_sched);
old_sched = sched;
goto out_unlock;
}
}
old_pe = svc->pe;
if (pe != old_pe) {
ip_vs_unbind_pe(svc);
ip_vs_bind_pe(svc, pe);
}
out_unlock:
write_unlock_bh(&__ip_vs_svc_lock);
out: out:
ip_vs_scheduler_put(old_sched); ip_vs_scheduler_put(old_sched);
ip_vs_pe_put(old_pe); ip_vs_pe_put(old_pe);
return ret; return ret;
} }
static void ip_vs_service_rcu_free(struct rcu_head *head)
{
struct ip_vs_service *svc;
svc = container_of(head, struct ip_vs_service, rcu_head);
ip_vs_service_free(svc);
}
/* /*
* Delete a service from the service list * Delete a service from the service list
@ -1394,21 +1341,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
ip_vs_stop_estimator(svc->net, &svc->stats); ip_vs_stop_estimator(svc->net, &svc->stats);
/* Unbind scheduler */ /* Unbind scheduler */
old_sched = svc->scheduler; old_sched = rcu_dereference_protected(svc->scheduler, 1);
ip_vs_unbind_scheduler(svc); ip_vs_unbind_scheduler(svc, old_sched);
ip_vs_scheduler_put(old_sched); ip_vs_scheduler_put(old_sched);
/* Unbind persistence engine */ /* Unbind persistence engine, keep svc->pe */
old_pe = svc->pe; old_pe = rcu_dereference_protected(svc->pe, 1);
ip_vs_unbind_pe(svc);
ip_vs_pe_put(old_pe); ip_vs_pe_put(old_pe);
/* Unbind app inc */
if (svc->inc) {
ip_vs_app_inc_put(svc->inc);
svc->inc = NULL;
}
/* /*
* Unlink the whole destination list * Unlink the whole destination list
*/ */
@ -1428,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/* /*
* Free the service if nobody refers to it * Free the service if nobody refers to it
*/ */
if (atomic_read(&svc->refcnt) == 0) { if (atomic_dec_and_test(&svc->refcnt)) {
IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark, svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr), IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port), atomic_read(&svc->usecnt)); ntohs(svc->port));
free_percpu(svc->stats.cpustats); call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
kfree(svc);
} }
/* decrease the module use count */ /* decrease the module use count */
@ -1446,21 +1385,14 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
*/ */
static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
{ {
/* Hold svc to avoid double release from dest_trash */
atomic_inc(&svc->refcnt);
/* /*
* Unhash it from the service table * Unhash it from the service table
*/ */
write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_unhash(svc); ip_vs_svc_unhash(svc);
/*
* Wait until all the svc users go away.
*/
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
__ip_vs_del_service(svc, cleanup); __ip_vs_del_service(svc, cleanup);
write_unlock_bh(&__ip_vs_svc_lock);
} }
/* /*
@ -1482,14 +1414,15 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
static int ip_vs_flush(struct net *net, bool cleanup) static int ip_vs_flush(struct net *net, bool cleanup)
{ {
int idx; int idx;
struct ip_vs_service *svc, *nxt; struct ip_vs_service *svc;
struct hlist_node *n;
/* /*
* Flush the service table hashed by <netns,protocol,addr,port> * Flush the service table hashed by <netns,protocol,addr,port>
*/ */
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
s_list) { s_list) {
if (net_eq(svc->net, net)) if (net_eq(svc->net, net))
ip_vs_unlink_service(svc, cleanup); ip_vs_unlink_service(svc, cleanup);
} }
@ -1499,8 +1432,8 @@ static int ip_vs_flush(struct net *net, bool cleanup)
* Flush the service table hashed by fwmark * Flush the service table hashed by fwmark
*/ */
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt, hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
&ip_vs_svc_fwm_table[idx], f_list) { f_list) {
if (net_eq(svc->net, net)) if (net_eq(svc->net, net))
ip_vs_unlink_service(svc, cleanup); ip_vs_unlink_service(svc, cleanup);
} }
@ -1558,7 +1491,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
EnterFunction(2); EnterFunction(2);
mutex_lock(&__ip_vs_mutex); mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net)) { if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations, list_for_each_entry(dest, &svc->destinations,
n_list) { n_list) {
@ -1567,7 +1500,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
} }
} }
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net)) { if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations, list_for_each_entry(dest, &svc->destinations,
n_list) { n_list) {
@ -1595,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
{ {
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
write_lock_bh(&__ip_vs_svc_lock);
list_for_each_entry(dest, &svc->destinations, n_list) { list_for_each_entry(dest, &svc->destinations, n_list) {
ip_vs_zero_stats(&dest->stats); ip_vs_zero_stats(&dest->stats);
} }
ip_vs_zero_stats(&svc->stats); ip_vs_zero_stats(&svc->stats);
write_unlock_bh(&__ip_vs_svc_lock);
return 0; return 0;
} }
@ -1610,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net)
struct ip_vs_service *svc; struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net)) if (net_eq(svc->net, net))
ip_vs_zero_service(svc); ip_vs_zero_service(svc);
} }
} }
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net)) if (net_eq(svc->net, net))
ip_vs_zero_service(svc); ip_vs_zero_service(svc);
} }
@ -1945,7 +1876,7 @@ static struct ctl_table vs_vars[] = {
struct ip_vs_iter { struct ip_vs_iter {
struct seq_net_private p; /* Do not move this, netns depends upon it*/ struct seq_net_private p; /* Do not move this, netns depends upon it*/
struct list_head *table; struct hlist_head *table;
int bucket; int bucket;
}; };
@ -1978,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */ /* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net) && pos-- == 0) { if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_table; iter->table = ip_vs_svc_table;
iter->bucket = idx; iter->bucket = idx;
@ -1989,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* keep looking in fwmark */ /* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
f_list) {
if (net_eq(svc->net, net) && pos-- == 0) { if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table; iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx; iter->bucket = idx;
@ -2002,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
} }
static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(__ip_vs_svc_lock)
{ {
read_lock_bh(&__ip_vs_svc_lock); rcu_read_lock();
return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
} }
static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{ {
struct list_head *e; struct hlist_node *e;
struct ip_vs_iter *iter; struct ip_vs_iter *iter;
struct ip_vs_service *svc; struct ip_vs_service *svc;
@ -2025,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (iter->table == ip_vs_svc_table) { if (iter->table == ip_vs_svc_table) {
/* next service in table hashed by protocol */ /* next service in table hashed by protocol */
if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) e = rcu_dereference(hlist_next_rcu(&svc->s_list));
return list_entry(e, struct ip_vs_service, s_list); if (e)
return hlist_entry(e, struct ip_vs_service, s_list);
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], hlist_for_each_entry_rcu(svc,
s_list) { &ip_vs_svc_table[iter->bucket],
s_list) {
return svc; return svc;
} }
} }
@ -2042,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
} }
/* next service in hashed by fwmark */ /* next service in hashed by fwmark */
if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) e = rcu_dereference(hlist_next_rcu(&svc->f_list));
return list_entry(e, struct ip_vs_service, f_list); if (e)
return hlist_entry(e, struct ip_vs_service, f_list);
scan_fwmark: scan_fwmark:
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], hlist_for_each_entry_rcu(svc,
f_list) &ip_vs_svc_fwm_table[iter->bucket],
f_list)
return svc; return svc;
} }
@ -2056,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
} }
static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
__releases(__ip_vs_svc_lock)
{ {
read_unlock_bh(&__ip_vs_svc_lock); rcu_read_unlock();
} }
@ -2076,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_service *svc = v; const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private; const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest; const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
if (iter->table == ip_vs_svc_table) { if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
@ -2084,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol), ip_vs_proto_name(svc->protocol),
&svc->addr.in6, &svc->addr.in6,
ntohs(svc->port), ntohs(svc->port),
svc->scheduler->name); sched->name);
else else
#endif #endif
seq_printf(seq, "%s %08X:%04X %s %s ", seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol), ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip), ntohl(svc->addr.ip),
ntohs(svc->port), ntohs(svc->port),
svc->scheduler->name, sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else { } else {
seq_printf(seq, "FWM %08X %s %s", seq_printf(seq, "FWM %08X %s %s",
svc->fwmark, svc->scheduler->name, svc->fwmark, sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} }
@ -2451,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
} }
/* Lookup the exact service by <protocol, addr, port> or fwmark */ /* Lookup the exact service by <protocol, addr, port> or fwmark */
rcu_read_lock();
if (usvc.fwmark == 0) if (usvc.fwmark == 0)
svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port); &usvc.addr, usvc.port);
else else
svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) { && (svc == NULL || svc->protocol != usvc.protocol)) {
@ -2507,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
static void static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{ {
struct ip_vs_scheduler *sched;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol; dst->protocol = src->protocol;
dst->addr = src->addr.ip; dst->addr = src->addr.ip;
dst->port = src->port; dst->port = src->port;
dst->fwmark = src->fwmark; dst->fwmark = src->fwmark;
strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
dst->flags = src->flags; dst->flags = src->flags;
dst->timeout = src->timeout / HZ; dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask; dst->netmask = src->netmask;
@ -2530,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net,
int ret = 0; int ret = 0;
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */ /* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net)) if (svc->af != AF_INET || !net_eq(svc->net, net))
continue; continue;
@ -2549,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net,
} }
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */ /* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net)) if (svc->af != AF_INET || !net_eq(svc->net, net))
continue; continue;
@ -2578,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
union nf_inet_addr addr = { .ip = get->addr }; union nf_inet_addr addr = { .ip = get->addr };
int ret = 0; int ret = 0;
rcu_read_lock();
if (get->fwmark) if (get->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else else
svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port); get->port);
rcu_read_unlock();
if (svc) { if (svc) {
int count = 0; int count = 0;
@ -2765,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
entry = (struct ip_vs_service_entry *)arg; entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr; addr.ip = entry->addr;
rcu_read_lock();
if (entry->fwmark) if (entry->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
else else
svc = __ip_vs_service_find(net, AF_INET, svc = __ip_vs_service_find(net, AF_INET,
entry->protocol, &addr, entry->protocol, &addr,
entry->port); entry->port);
rcu_read_unlock();
if (svc) { if (svc) {
ip_vs_copy_service(entry, svc); ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0) if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@ -2927,6 +2870,7 @@ nla_put_failure:
static int ip_vs_genl_fill_service(struct sk_buff *skb, static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_service *svc) struct ip_vs_service *svc)
{ {
struct ip_vs_scheduler *sched;
struct nlattr *nl_service; struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags, struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 }; .mask = ~0 };
@ -2947,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
goto nla_put_failure; goto nla_put_failure;
} }
if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || sched = rcu_dereference_protected(svc->scheduler, 1);
if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
(svc->pe && (svc->pe &&
nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
@ -2998,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
mutex_lock(&__ip_vs_mutex); mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
if (++idx <= start || !net_eq(svc->net, net)) if (++idx <= start || !net_eq(svc->net, net))
continue; continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@ -3009,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
} }
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
if (++idx <= start || !net_eq(svc->net, net)) if (++idx <= start || !net_eq(svc->net, net))
continue; continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@ -3069,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net,
usvc->fwmark = 0; usvc->fwmark = 0;
} }
rcu_read_lock();
if (usvc->fwmark) if (usvc->fwmark)
svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else else
svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port); &usvc->addr, usvc->port);
rcu_read_unlock();
*ret_svc = svc; *ret_svc = svc;
/* If a full entry was requested, check for the additional fields */ /* If a full entry was requested, check for the additional fields */
@ -3905,8 +3852,8 @@ int __init ip_vs_control_init(void)
/* Initialize svc_table, ip_vs_svc_fwm_table */ /* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
} }
smp_wmb(); /* Do we really need it now ? */ smp_wmb(); /* Do we really need it now ? */

View file

@ -269,6 +269,7 @@ static int __init ip_vs_dh_init(void)
static void __exit ip_vs_dh_cleanup(void) static void __exit ip_vs_dh_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
synchronize_rcu();
} }

View file

@ -633,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops); unregister_pernet_subsys(&ip_vs_lblc_ops);
synchronize_rcu();
} }

View file

@ -821,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops); unregister_pernet_subsys(&ip_vs_lblcr_ops);
synchronize_rcu();
} }

View file

@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
static void __exit ip_vs_lc_cleanup(void) static void __exit ip_vs_lc_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_lc_init); module_init(ip_vs_lc_init);

View file

@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
static void __exit ip_vs_nq_cleanup(void) static void __exit ip_vs_nq_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_nq_init); module_init(ip_vs_nq_init);

View file

@ -16,18 +16,6 @@ static LIST_HEAD(ip_vs_pe);
/* semaphore for IPVS PEs. */ /* semaphore for IPVS PEs. */
static DEFINE_MUTEX(ip_vs_pe_mutex); static DEFINE_MUTEX(ip_vs_pe_mutex);
/* Bind a service with a pe */
void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
{
svc->pe = pe;
}
/* Unbind a service from its pe */
void ip_vs_unbind_pe(struct ip_vs_service *svc)
{
svc->pe = NULL;
}
/* Get pe in the pe list by name */ /* Get pe in the pe list by name */
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{ {

View file

@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL) if (sch == NULL)
return 0; return 0;
net = skb_net(skb); net = skb_net(skb);
rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) && if ((sch->type == SCTP_CID_INIT) &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, sh->dest))) { &iph->daddr, sh->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(net_ipvs(net))) {
@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }

View file

@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
} }
net = skb_net(skb); net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
if (th->syn && if (th->syn &&
(svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, th->dest))) { &iph->daddr, th->dest))) {
int ignored; int ignored;
if (ip_vs_todrop(net_ipvs(net))) { if (ip_vs_todrop(net_ipvs(net))) {
@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }

View file

@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0; return 0;
} }
net = skb_net(skb); net = skb_net(skb);
svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, rcu_read_lock();
&iph->daddr, uh->dest); svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
&iph->daddr, uh->dest);
if (svc) { if (svc) {
int ignored; int ignored;
@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded. * It seems that we are very loaded.
* We have to drop this packet :( * We have to drop this packet :(
*/ */
ip_vs_service_put(svc); rcu_read_unlock();
*verdict = NF_DROP; *verdict = NF_DROP;
return 0; return 0;
} }
@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) { if (!*cpp && ignored <= 0) {
if (!ignored) if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph); *verdict = ip_vs_leave(svc, skb, pd, iph);
else { else
ip_vs_service_put(svc);
*verdict = NF_DROP; *verdict = NF_DROP;
} rcu_read_unlock();
return 0; return 0;
} }
ip_vs_service_put(svc);
} }
rcu_read_unlock();
/* NF_ACCEPT */ /* NF_ACCEPT */
return 1; return 1;
} }

View file

@ -121,6 +121,7 @@ static int __init ip_vs_rr_init(void)
static void __exit ip_vs_rr_cleanup(void) static void __exit ip_vs_rr_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_rr_init); module_init(ip_vs_rr_init);

View file

@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
{ {
int ret; int ret;
svc->scheduler = scheduler;
if (scheduler->init_service) { if (scheduler->init_service) {
ret = scheduler->init_service(svc); ret = scheduler->init_service(svc);
if (ret) { if (ret) {
@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
return ret; return ret;
} }
} }
rcu_assign_pointer(svc->scheduler, scheduler);
return 0; return 0;
} }
@ -64,17 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/* /*
* Unbind a service with its scheduler * Unbind a service with its scheduler
*/ */
void ip_vs_unbind_scheduler(struct ip_vs_service *svc) void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched)
{ {
struct ip_vs_scheduler *sched = svc->scheduler; struct ip_vs_scheduler *cur_sched;
if (!sched) cur_sched = rcu_dereference_protected(svc->scheduler, 1);
/* This check proves that old 'sched' was installed */
if (!cur_sched)
return; return;
if (sched->done_service) if (sched->done_service)
sched->done_service(svc); sched->done_service(svc);
/* svc->scheduler can not be set to NULL */
svc->scheduler = NULL;
} }
@ -148,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{ {
struct ip_vs_scheduler *sched;
sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) { if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
svc->scheduler->name, svc->fwmark, sched->name, svc->fwmark, svc->fwmark, msg);
svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) { } else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
svc->scheduler->name, sched->name, ip_vs_proto_name(svc->protocol),
ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg); &svc->addr.in6, ntohs(svc->port), msg);
#endif #endif
} else { } else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
svc->scheduler->name, sched->name, ip_vs_proto_name(svc->protocol),
ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg); &svc->addr.ip, ntohs(svc->port), msg);
} }
} }

View file

@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
static void __exit ip_vs_sed_cleanup(void) static void __exit ip_vs_sed_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_sed_init); module_init(ip_vs_sed_init);

View file

@ -283,6 +283,7 @@ static int __init ip_vs_sh_init(void)
static void __exit ip_vs_sh_cleanup(void) static void __exit ip_vs_sh_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
synchronize_rcu();
} }

View file

@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
static void __exit ip_vs_wlc_cleanup(void) static void __exit ip_vs_wlc_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_wlc_init); module_init(ip_vs_wlc_init);

View file

@ -261,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
static void __exit ip_vs_wrr_cleanup(void) static void __exit ip_vs_wrr_cleanup(void)
{ {
unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
synchronize_rcu();
} }
module_init(ip_vs_wrr_init); module_init(ip_vs_wrr_init);