virtio-scsi: replace target spinlock with seqcount
The spinlock of tgt_lock is only for serializing read and write req_vq, one lockless seqcount is enough for the purpose. On one 16core VM with vhost-scsi backend, the patch can improve IOPS with 3% on random read test. Signed-off-by: Ming Lei <ming.lei@canonical.com> [Add initialization in virtscsi_target_alloc. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
0758f4f732
commit
938ece711c
1 changed files with 29 additions and 13 deletions
|
@ -27,6 +27,7 @@
|
|||
#include <scsi/scsi_host.h>
|
||||
#include <scsi/scsi_device.h>
|
||||
#include <scsi/scsi_cmnd.h>
|
||||
#include <linux/seqlock.h>
|
||||
|
||||
#define VIRTIO_SCSI_MEMPOOL_SZ 64
|
||||
#define VIRTIO_SCSI_EVENT_LEN 8
|
||||
|
@ -75,18 +76,16 @@ struct virtio_scsi_vq {
|
|||
* queue, and also lets the driver optimize the IRQ affinity for the virtqueues
|
||||
* (each virtqueue's affinity is set to the CPU that "owns" the queue).
|
||||
*
|
||||
* tgt_lock is held to serialize reading and writing req_vq. Reading req_vq
|
||||
* could be done locklessly, but we do not do it yet.
|
||||
* tgt_seq is held to serialize reading and writing req_vq.
|
||||
*
|
||||
* Decrements of reqs are never concurrent with writes of req_vq: before the
|
||||
* decrement reqs will be != 0; after the decrement the virtqueue completion
|
||||
* routine will not use the req_vq so it can be changed by a new request.
|
||||
* Thus they can happen outside the tgt_lock, provided of course we make reqs
|
||||
* Thus they can happen outside the tgt_seq, provided of course we make reqs
|
||||
* an atomic_t.
|
||||
*/
|
||||
struct virtio_scsi_target_state {
|
||||
/* This spinlock never held at the same time as vq_lock. */
|
||||
spinlock_t tgt_lock;
|
||||
seqcount_t tgt_seq;
|
||||
|
||||
/* Count of outstanding requests. */
|
||||
atomic_t reqs;
|
||||
|
@ -559,19 +558,33 @@ static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
|
|||
unsigned long flags;
|
||||
u32 queue_num;
|
||||
|
||||
spin_lock_irqsave(&tgt->tgt_lock, flags);
|
||||
local_irq_save(flags);
|
||||
if (atomic_inc_return(&tgt->reqs) > 1) {
|
||||
unsigned long seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&tgt->tgt_seq);
|
||||
vq = tgt->req_vq;
|
||||
} while (read_seqcount_retry(&tgt->tgt_seq, seq));
|
||||
} else {
|
||||
/* no writes can be concurrent because of atomic_t */
|
||||
write_seqcount_begin(&tgt->tgt_seq);
|
||||
|
||||
/* keep previous req_vq if a reader just arrived */
|
||||
if (unlikely(atomic_read(&tgt->reqs) > 1)) {
|
||||
vq = tgt->req_vq;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (atomic_inc_return(&tgt->reqs) > 1)
|
||||
vq = tgt->req_vq;
|
||||
else {
|
||||
queue_num = smp_processor_id();
|
||||
while (unlikely(queue_num >= vscsi->num_queues))
|
||||
queue_num -= vscsi->num_queues;
|
||||
|
||||
tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
|
||||
unlock:
|
||||
write_seqcount_end(&tgt->tgt_seq);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
spin_unlock_irqrestore(&tgt->tgt_lock, flags);
|
||||
return vq;
|
||||
}
|
||||
|
||||
|
@ -667,14 +680,17 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
|
|||
|
||||
static int virtscsi_target_alloc(struct scsi_target *starget)
|
||||
{
|
||||
struct Scsi_Host *sh = dev_to_shost(starget->dev.parent);
|
||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||
|
||||
struct virtio_scsi_target_state *tgt =
|
||||
kmalloc(sizeof(*tgt), GFP_KERNEL);
|
||||
if (!tgt)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&tgt->tgt_lock);
|
||||
seqcount_init(&tgt->tgt_seq);
|
||||
atomic_set(&tgt->reqs, 0);
|
||||
tgt->req_vq = NULL;
|
||||
tgt->req_vq = &vscsi->req_vqs[0];
|
||||
|
||||
starget->hostdata = tgt;
|
||||
return 0;
|
||||
|
|
Loading…
Reference in a new issue