[NETFILTER]: nfnetlink: use RCU for queue instances hash
Use RCU for queue instances hash. Avoids multiple atomic operations for each packet. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
a3c8e7fd4b
commit
9872bec773
1 changed files with 93 additions and 114 deletions
|
@ -47,7 +47,7 @@
|
|||
|
||||
struct nfqnl_instance {
|
||||
struct hlist_node hlist; /* global list of queues */
|
||||
atomic_t use;
|
||||
struct rcu_head rcu;
|
||||
|
||||
int peer_pid;
|
||||
unsigned int queue_maxlen;
|
||||
|
@ -68,7 +68,7 @@ struct nfqnl_instance {
|
|||
|
||||
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
|
||||
|
||||
static DEFINE_RWLOCK(instances_lock);
|
||||
static DEFINE_SPINLOCK(instances_lock);
|
||||
|
||||
#define INSTANCE_BUCKETS 16
|
||||
static struct hlist_head instance_table[INSTANCE_BUCKETS];
|
||||
|
@ -79,52 +79,30 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
|
|||
}
|
||||
|
||||
static struct nfqnl_instance *
|
||||
__instance_lookup(u_int16_t queue_num)
|
||||
instance_lookup(u_int16_t queue_num)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *pos;
|
||||
struct nfqnl_instance *inst;
|
||||
|
||||
head = &instance_table[instance_hashfn(queue_num)];
|
||||
hlist_for_each_entry(inst, pos, head, hlist) {
|
||||
hlist_for_each_entry_rcu(inst, pos, head, hlist) {
|
||||
if (inst->queue_num == queue_num)
|
||||
return inst;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct nfqnl_instance *
|
||||
instance_lookup_get(u_int16_t queue_num)
|
||||
{
|
||||
struct nfqnl_instance *inst;
|
||||
|
||||
read_lock_bh(&instances_lock);
|
||||
inst = __instance_lookup(queue_num);
|
||||
if (inst)
|
||||
atomic_inc(&inst->use);
|
||||
read_unlock_bh(&instances_lock);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
static void
|
||||
instance_put(struct nfqnl_instance *inst)
|
||||
{
|
||||
if (inst && atomic_dec_and_test(&inst->use)) {
|
||||
QDEBUG("kfree(inst=%p)\n", inst);
|
||||
kfree(inst);
|
||||
}
|
||||
}
|
||||
|
||||
static struct nfqnl_instance *
|
||||
instance_create(u_int16_t queue_num, int pid)
|
||||
{
|
||||
struct nfqnl_instance *inst;
|
||||
unsigned int h;
|
||||
|
||||
QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid);
|
||||
|
||||
write_lock_bh(&instances_lock);
|
||||
if (__instance_lookup(queue_num)) {
|
||||
spin_lock(&instances_lock);
|
||||
if (instance_lookup(queue_num)) {
|
||||
inst = NULL;
|
||||
QDEBUG("aborting, instance already exists\n");
|
||||
goto out_unlock;
|
||||
|
@ -139,18 +117,17 @@ instance_create(u_int16_t queue_num, int pid)
|
|||
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
|
||||
inst->copy_range = 0xfffff;
|
||||
inst->copy_mode = NFQNL_COPY_NONE;
|
||||
/* needs to be two, since we _put() after creation */
|
||||
atomic_set(&inst->use, 2);
|
||||
spin_lock_init(&inst->lock);
|
||||
INIT_LIST_HEAD(&inst->queue_list);
|
||||
INIT_RCU_HEAD(&inst->rcu);
|
||||
|
||||
if (!try_module_get(THIS_MODULE))
|
||||
goto out_free;
|
||||
|
||||
hlist_add_head(&inst->hlist,
|
||||
&instance_table[instance_hashfn(queue_num)]);
|
||||
h = instance_hashfn(queue_num);
|
||||
hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
|
||||
|
||||
write_unlock_bh(&instances_lock);
|
||||
spin_unlock(&instances_lock);
|
||||
|
||||
QDEBUG("successfully created new instance\n");
|
||||
|
||||
|
@ -159,7 +136,7 @@ instance_create(u_int16_t queue_num, int pid)
|
|||
out_free:
|
||||
kfree(inst);
|
||||
out_unlock:
|
||||
write_unlock_bh(&instances_lock);
|
||||
spin_unlock(&instances_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -167,38 +144,29 @@ static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
|
|||
unsigned long data);
|
||||
|
||||
static void
|
||||
_instance_destroy2(struct nfqnl_instance *inst, int lock)
|
||||
instance_destroy_rcu(struct rcu_head *head)
|
||||
{
|
||||
/* first pull it out of the global list */
|
||||
if (lock)
|
||||
write_lock_bh(&instances_lock);
|
||||
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
|
||||
rcu);
|
||||
|
||||
QDEBUG("removing instance %p (queuenum=%u) from hash\n",
|
||||
inst, inst->queue_num);
|
||||
hlist_del(&inst->hlist);
|
||||
|
||||
if (lock)
|
||||
write_unlock_bh(&instances_lock);
|
||||
|
||||
/* then flush all pending skbs from the queue */
|
||||
nfqnl_flush(inst, NULL, 0);
|
||||
|
||||
/* and finally put the refcount */
|
||||
instance_put(inst);
|
||||
|
||||
kfree(inst);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static void
|
||||
__instance_destroy(struct nfqnl_instance *inst)
|
||||
{
|
||||
_instance_destroy2(inst, 0);
|
||||
hlist_del_rcu(&inst->hlist);
|
||||
call_rcu(&inst->rcu, instance_destroy_rcu);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static void
|
||||
instance_destroy(struct nfqnl_instance *inst)
|
||||
{
|
||||
_instance_destroy2(inst, 1);
|
||||
spin_lock(&instances_lock);
|
||||
__instance_destroy(inst);
|
||||
spin_unlock(&instances_lock);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -485,7 +453,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
|||
|
||||
QDEBUG("entered\n");
|
||||
|
||||
queue = instance_lookup_get(queuenum);
|
||||
/* rcu_read_lock()ed by nf_hook_slow() */
|
||||
queue = instance_lookup(queuenum);
|
||||
if (!queue) {
|
||||
QDEBUG("no queue instance matching\n");
|
||||
return -EINVAL;
|
||||
|
@ -493,13 +462,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
|||
|
||||
if (queue->copy_mode == NFQNL_COPY_NONE) {
|
||||
QDEBUG("mode COPY_NONE, aborting\n");
|
||||
status = -EAGAIN;
|
||||
goto err_out_put;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
nskb = nfqnl_build_packet_message(queue, entry, &status);
|
||||
if (nskb == NULL)
|
||||
goto err_out_put;
|
||||
return status;
|
||||
|
||||
spin_lock_bh(&queue->lock);
|
||||
|
||||
|
@ -526,7 +494,6 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
|
|||
__enqueue_entry(queue, entry);
|
||||
|
||||
spin_unlock_bh(&queue->lock);
|
||||
instance_put(queue);
|
||||
return status;
|
||||
|
||||
err_out_free_nskb:
|
||||
|
@ -534,9 +501,6 @@ err_out_free_nskb:
|
|||
|
||||
err_out_unlock:
|
||||
spin_unlock_bh(&queue->lock);
|
||||
|
||||
err_out_put:
|
||||
instance_put(queue);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -616,21 +580,18 @@ nfqnl_dev_drop(int ifindex)
|
|||
|
||||
QDEBUG("entering for ifindex %u\n", ifindex);
|
||||
|
||||
/* this only looks like we have to hold the readlock for a way too long
|
||||
* time, issue_verdict(), nf_reinject(), ... - but we always only
|
||||
* issue NF_DROP, which is processed directly in nf_reinject() */
|
||||
read_lock_bh(&instances_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||
struct hlist_node *tmp;
|
||||
struct nfqnl_instance *inst;
|
||||
struct hlist_head *head = &instance_table[i];
|
||||
|
||||
hlist_for_each_entry(inst, tmp, head, hlist)
|
||||
hlist_for_each_entry_rcu(inst, tmp, head, hlist)
|
||||
nfqnl_flush(inst, dev_cmp, ifindex);
|
||||
}
|
||||
|
||||
read_unlock_bh(&instances_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
|
||||
|
@ -665,7 +626,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
|
|||
int i;
|
||||
|
||||
/* destroy all instances for this pid */
|
||||
write_lock_bh(&instances_lock);
|
||||
spin_lock(&instances_lock);
|
||||
for (i = 0; i < INSTANCE_BUCKETS; i++) {
|
||||
struct hlist_node *tmp, *t2;
|
||||
struct nfqnl_instance *inst;
|
||||
|
@ -677,7 +638,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
|
|||
__instance_destroy(inst);
|
||||
}
|
||||
}
|
||||
write_unlock_bh(&instances_lock);
|
||||
spin_unlock(&instances_lock);
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
@ -705,18 +666,21 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
|
|||
struct nf_queue_entry *entry;
|
||||
int err;
|
||||
|
||||
queue = instance_lookup_get(queue_num);
|
||||
if (!queue)
|
||||
return -ENODEV;
|
||||
rcu_read_lock();
|
||||
queue = instance_lookup(queue_num);
|
||||
if (!queue) {
|
||||
err = -ENODEV;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
if (queue->peer_pid != NETLINK_CB(skb).pid) {
|
||||
err = -EPERM;
|
||||
goto err_out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
if (!nfqa[NFQA_VERDICT_HDR]) {
|
||||
err = -EINVAL;
|
||||
goto err_out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
|
||||
|
@ -724,14 +688,15 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
|
|||
|
||||
if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
|
||||
err = -EINVAL;
|
||||
goto err_out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
|
||||
if (entry == NULL) {
|
||||
err = -ENOENT;
|
||||
goto err_out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (nfqa[NFQA_PAYLOAD]) {
|
||||
if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
|
||||
|
@ -744,11 +709,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
|
|||
nla_data(nfqa[NFQA_MARK]));
|
||||
|
||||
nf_reinject(entry, verdict);
|
||||
instance_put(queue);
|
||||
return 0;
|
||||
|
||||
err_out_put:
|
||||
instance_put(queue);
|
||||
err_out_unlock:
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -776,45 +740,61 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
|||
struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
|
||||
u_int16_t queue_num = ntohs(nfmsg->res_id);
|
||||
struct nfqnl_instance *queue;
|
||||
struct nfqnl_msg_config_cmd *cmd = NULL;
|
||||
int ret = 0;
|
||||
|
||||
QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type));
|
||||
|
||||
queue = instance_lookup_get(queue_num);
|
||||
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
|
||||
ret = -EPERM;
|
||||
goto out_put;
|
||||
if (nfqa[NFQA_CFG_CMD]) {
|
||||
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
|
||||
|
||||
/* Commands without queue context - might sleep */
|
||||
switch (cmd->command) {
|
||||
case NFQNL_CFG_CMD_PF_BIND:
|
||||
ret = nf_register_queue_handler(ntohs(cmd->pf),
|
||||
&nfqh);
|
||||
break;
|
||||
case NFQNL_CFG_CMD_PF_UNBIND:
|
||||
ret = nf_unregister_queue_handler(ntohs(cmd->pf),
|
||||
&nfqh);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (nfqa[NFQA_CFG_CMD]) {
|
||||
struct nfqnl_msg_config_cmd *cmd;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
cmd = nla_data(nfqa[NFQA_CFG_CMD]);
|
||||
QDEBUG("found CFG_CMD\n");
|
||||
rcu_read_lock();
|
||||
queue = instance_lookup(queue_num);
|
||||
if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
|
||||
ret = -EPERM;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
|
||||
if (cmd != NULL) {
|
||||
switch (cmd->command) {
|
||||
case NFQNL_CFG_CMD_BIND:
|
||||
if (queue)
|
||||
return -EBUSY;
|
||||
|
||||
if (queue) {
|
||||
ret = -EBUSY;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
queue = instance_create(queue_num, NETLINK_CB(skb).pid);
|
||||
if (!queue)
|
||||
return -EINVAL;
|
||||
if (!queue) {
|
||||
ret = -EINVAL;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
break;
|
||||
case NFQNL_CFG_CMD_UNBIND:
|
||||
if (!queue)
|
||||
return -ENODEV;
|
||||
if (!queue) {
|
||||
ret = -ENODEV;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
instance_destroy(queue);
|
||||
break;
|
||||
case NFQNL_CFG_CMD_PF_BIND:
|
||||
QDEBUG("registering queue handler for pf=%u\n",
|
||||
ntohs(cmd->pf));
|
||||
ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh);
|
||||
break;
|
||||
case NFQNL_CFG_CMD_PF_UNBIND:
|
||||
QDEBUG("unregistering queue handler for pf=%u\n",
|
||||
ntohs(cmd->pf));
|
||||
ret = nf_unregister_queue_handler(ntohs(cmd->pf), &nfqh);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
|
@ -827,7 +807,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
|||
|
||||
if (!queue) {
|
||||
ret = -ENODEV;
|
||||
goto out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
params = nla_data(nfqa[NFQA_CFG_PARAMS]);
|
||||
nfqnl_set_mode(queue, params->copy_mode,
|
||||
|
@ -839,7 +819,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
|||
|
||||
if (!queue) {
|
||||
ret = -ENODEV;
|
||||
goto out_put;
|
||||
goto err_out_unlock;
|
||||
}
|
||||
queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
|
||||
spin_lock_bh(&queue->lock);
|
||||
|
@ -847,8 +827,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
|
|||
spin_unlock_bh(&queue->lock);
|
||||
}
|
||||
|
||||
out_put:
|
||||
instance_put(queue);
|
||||
err_out_unlock:
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -916,7 +896,7 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
|
|||
|
||||
static void *seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
read_lock_bh(&instances_lock);
|
||||
spin_lock(&instances_lock);
|
||||
return get_idx(seq, *pos);
|
||||
}
|
||||
|
||||
|
@ -928,7 +908,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
|
|||
|
||||
static void seq_stop(struct seq_file *s, void *v)
|
||||
{
|
||||
read_unlock_bh(&instances_lock);
|
||||
spin_unlock(&instances_lock);
|
||||
}
|
||||
|
||||
static int seq_show(struct seq_file *s, void *v)
|
||||
|
@ -940,8 +920,7 @@ static int seq_show(struct seq_file *s, void *v)
|
|||
inst->peer_pid, inst->queue_total,
|
||||
inst->copy_mode, inst->copy_range,
|
||||
inst->queue_dropped, inst->queue_user_dropped,
|
||||
inst->id_sequence,
|
||||
atomic_read(&inst->use));
|
||||
inst->id_sequence, 1);
|
||||
}
|
||||
|
||||
static const struct seq_operations nfqnl_seq_ops = {
|
||||
|
|
Loading…
Reference in a new issue