virtio: fixes on top of 4.4-rc4
This includes some fixes and cleanups in virtio and vhost code. Most notably, shadowing the index fixes the excessive cacheline bouncing observed on AMD platforms. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJWZaWhAAoJECgfDbjSjVRpWgkH/27ol7FAizFI6Dli6YjdGk/P PJwSE/BR1ki/YohvUAjRZyyb1joBcVXC6rgy/5NFwWW6EfeZoAFaJsEgJO4jcxxw UizVUnVVQ2UFmJ+IsCFMQCYf6IaGgFegGABjJNvyVhn+XeHhxkW690Z6GKy/IOx7 tueq1HaHeXt9Edx7D2cUsp4INmVU51fwrLmS72Z4S87W9pQ0IKG9C8Q1n+pq/9MD 1PqoqmqVgRjLwndV4YyXCGdJRKIh7SvDTGflm6vyluIbwnFLiz5G59SP4p0GJd6L TYhqL8g+kUEjYOyvPqfoFA+aPrOTmCEfsflrpIk6miOwP3QOVbQhEsgauxL/ivo= =owan -----END PGP SIGNATURE----- Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost Pull virtio fixes from Michael Tsirkin: "This includes some fixes and cleanups in virtio and vhost code. Most notably, shadowing the index fixes the excessive cacheline bouncing observed on AMD platforms" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: virtio_ring: shadow available ring flags & index virtio: Do not drop __GFP_HIGH in alloc_indirect vhost: replace % with & on data path tools/virtio: fix byteswap logic tools/virtio: move list macro stubs virtio: fix memory leak of virtio ida cache layers vhost: relax log address alignment virtio-net: Stop doing DMA from the stack
This commit is contained in:
commit
62ea1ec5e1
7 changed files with 78 additions and 45 deletions
|
@ -140,6 +140,12 @@ struct virtnet_info {
|
|||
|
||||
/* CPU hot plug notifier */
|
||||
struct notifier_block nb;
|
||||
|
||||
/* Control VQ buffers: protected by the rtnl lock */
|
||||
struct virtio_net_ctrl_hdr ctrl_hdr;
|
||||
virtio_net_ctrl_ack ctrl_status;
|
||||
u8 ctrl_promisc;
|
||||
u8 ctrl_allmulti;
|
||||
};
|
||||
|
||||
struct padded_vnet_hdr {
|
||||
|
@ -976,31 +982,30 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
|
|||
struct scatterlist *out)
|
||||
{
|
||||
struct scatterlist *sgs[4], hdr, stat;
|
||||
struct virtio_net_ctrl_hdr ctrl;
|
||||
virtio_net_ctrl_ack status = ~0;
|
||||
unsigned out_num = 0, tmp;
|
||||
|
||||
/* Caller should know better */
|
||||
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
|
||||
|
||||
ctrl.class = class;
|
||||
ctrl.cmd = cmd;
|
||||
vi->ctrl_status = ~0;
|
||||
vi->ctrl_hdr.class = class;
|
||||
vi->ctrl_hdr.cmd = cmd;
|
||||
/* Add header */
|
||||
sg_init_one(&hdr, &ctrl, sizeof(ctrl));
|
||||
sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
|
||||
sgs[out_num++] = &hdr;
|
||||
|
||||
if (out)
|
||||
sgs[out_num++] = out;
|
||||
|
||||
/* Add return status. */
|
||||
sg_init_one(&stat, &status, sizeof(status));
|
||||
sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
|
||||
sgs[out_num] = &stat;
|
||||
|
||||
BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
|
||||
virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
|
||||
|
||||
if (unlikely(!virtqueue_kick(vi->cvq)))
|
||||
return status == VIRTIO_NET_OK;
|
||||
return vi->ctrl_status == VIRTIO_NET_OK;
|
||||
|
||||
/* Spin for a response, the kick causes an ioport write, trapping
|
||||
* into the hypervisor, so the request should be handled immediately.
|
||||
|
@ -1009,7 +1014,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
|
|||
!virtqueue_is_broken(vi->cvq))
|
||||
cpu_relax();
|
||||
|
||||
return status == VIRTIO_NET_OK;
|
||||
return vi->ctrl_status == VIRTIO_NET_OK;
|
||||
}
|
||||
|
||||
static int virtnet_set_mac_address(struct net_device *dev, void *p)
|
||||
|
@ -1151,7 +1156,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
|
|||
{
|
||||
struct virtnet_info *vi = netdev_priv(dev);
|
||||
struct scatterlist sg[2];
|
||||
u8 promisc, allmulti;
|
||||
struct virtio_net_ctrl_mac *mac_data;
|
||||
struct netdev_hw_addr *ha;
|
||||
int uc_count;
|
||||
|
@ -1163,22 +1167,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
|
|||
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
|
||||
return;
|
||||
|
||||
promisc = ((dev->flags & IFF_PROMISC) != 0);
|
||||
allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
|
||||
vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
|
||||
vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
|
||||
|
||||
sg_init_one(sg, &promisc, sizeof(promisc));
|
||||
sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
|
||||
|
||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
||||
VIRTIO_NET_CTRL_RX_PROMISC, sg))
|
||||
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
|
||||
promisc ? "en" : "dis");
|
||||
vi->ctrl_promisc ? "en" : "dis");
|
||||
|
||||
sg_init_one(sg, &allmulti, sizeof(allmulti));
|
||||
sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
|
||||
|
||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
||||
VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
|
||||
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
|
||||
allmulti ? "en" : "dis");
|
||||
vi->ctrl_allmulti ? "en" : "dis");
|
||||
|
||||
uc_count = netdev_uc_count(dev);
|
||||
mc_count = netdev_mc_count(dev);
|
||||
|
|
|
@ -819,7 +819,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
|
|||
BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
|
||||
if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
|
||||
(a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
|
||||
(a.log_guest_addr & (sizeof(u64) - 1))) {
|
||||
(a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
@ -1369,7 +1369,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
|
|||
/* Grab the next descriptor number they're advertising, and increment
|
||||
* the index we've seen. */
|
||||
if (unlikely(__get_user(ring_head,
|
||||
&vq->avail->ring[last_avail_idx % vq->num]))) {
|
||||
&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
|
||||
vq_err(vq, "Failed to read head: idx %d address %p\n",
|
||||
last_avail_idx,
|
||||
&vq->avail->ring[last_avail_idx % vq->num]);
|
||||
|
@ -1489,7 +1489,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
|
|||
u16 old, new;
|
||||
int start;
|
||||
|
||||
start = vq->last_used_idx % vq->num;
|
||||
start = vq->last_used_idx & (vq->num - 1);
|
||||
used = vq->used->ring + start;
|
||||
if (count == 1) {
|
||||
if (__put_user(heads[0].id, &used->id)) {
|
||||
|
@ -1531,7 +1531,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
|
|||
{
|
||||
int start, n, r;
|
||||
|
||||
start = vq->last_used_idx % vq->num;
|
||||
start = vq->last_used_idx & (vq->num - 1);
|
||||
n = vq->num - start;
|
||||
if (n < count) {
|
||||
r = __vhost_add_used_n(vq, heads, n);
|
||||
|
|
|
@ -412,6 +412,7 @@ static int virtio_init(void)
|
|||
static void __exit virtio_exit(void)
|
||||
{
|
||||
bus_unregister(&virtio_bus);
|
||||
ida_destroy(&virtio_index_ida);
|
||||
}
|
||||
core_initcall(virtio_init);
|
||||
module_exit(virtio_exit);
|
||||
|
|
|
@ -80,6 +80,12 @@ struct vring_virtqueue {
|
|||
/* Last used index we've seen. */
|
||||
u16 last_used_idx;
|
||||
|
||||
/* Last written value to avail->flags */
|
||||
u16 avail_flags_shadow;
|
||||
|
||||
/* Last written value to avail->idx in guest byte order */
|
||||
u16 avail_idx_shadow;
|
||||
|
||||
/* How to notify other side. FIXME: commonalize hcalls! */
|
||||
bool (*notify)(struct virtqueue *vq);
|
||||
|
||||
|
@ -109,7 +115,7 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
|
|||
* otherwise virt_to_phys will give us bogus addresses in the
|
||||
* virtqueue.
|
||||
*/
|
||||
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
|
||||
gfp &= ~__GFP_HIGHMEM;
|
||||
|
||||
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
|
||||
if (!desc)
|
||||
|
@ -235,13 +241,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
|
|||
|
||||
/* Put entry in available array (but don't update avail->idx until they
|
||||
* do sync). */
|
||||
avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1);
|
||||
avail = vq->avail_idx_shadow & (vq->vring.num - 1);
|
||||
vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
|
||||
|
||||
/* Descriptors and available array need to be set before we expose the
|
||||
* new available array entries. */
|
||||
virtio_wmb(vq->weak_barriers);
|
||||
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1);
|
||||
vq->avail_idx_shadow++;
|
||||
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
|
||||
vq->num_added++;
|
||||
|
||||
pr_debug("Added buffer head %i to %p\n", head, vq);
|
||||
|
@ -354,8 +361,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
|
|||
* event. */
|
||||
virtio_mb(vq->weak_barriers);
|
||||
|
||||
old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added;
|
||||
new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx);
|
||||
old = vq->avail_idx_shadow - vq->num_added;
|
||||
new = vq->avail_idx_shadow;
|
||||
vq->num_added = 0;
|
||||
|
||||
#ifdef DEBUG
|
||||
|
@ -510,7 +517,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
|||
/* If we expect an interrupt for the next entry, tell host
|
||||
* by writing event index and flush out the write before
|
||||
* the read in the next get_buf call. */
|
||||
if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) {
|
||||
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
|
||||
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
|
||||
virtio_mb(vq->weak_barriers);
|
||||
}
|
||||
|
@ -537,7 +544,11 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
|
|||
{
|
||||
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||
|
||||
vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT);
|
||||
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
|
||||
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
|
||||
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
|
||||
}
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
|
||||
|
||||
|
@ -565,7 +576,10 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
|
|||
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
|
||||
* either clear the flags bit or point the event index at the next
|
||||
* entry. Always do both to keep code simple. */
|
||||
vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
|
||||
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
|
||||
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
||||
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
|
||||
}
|
||||
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
|
||||
END_USE(vq);
|
||||
return last_used_idx;
|
||||
|
@ -633,9 +647,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
|||
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
|
||||
* either clear the flags bit or point the event index at the next
|
||||
* entry. Always do both to keep code simple. */
|
||||
vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
|
||||
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
|
||||
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
||||
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
|
||||
}
|
||||
/* TODO: tune this threshold */
|
||||
bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4;
|
||||
bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
|
||||
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
|
||||
virtio_mb(vq->weak_barriers);
|
||||
if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
|
||||
|
@ -670,7 +687,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
|
|||
/* detach_buf clears data, so grab it now. */
|
||||
buf = vq->data[i];
|
||||
detach_buf(vq, i);
|
||||
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
|
||||
vq->avail_idx_shadow--;
|
||||
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
|
||||
END_USE(vq);
|
||||
return buf;
|
||||
}
|
||||
|
@ -735,6 +753,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
|
|||
vq->weak_barriers = weak_barriers;
|
||||
vq->broken = false;
|
||||
vq->last_used_idx = 0;
|
||||
vq->avail_flags_shadow = 0;
|
||||
vq->avail_idx_shadow = 0;
|
||||
vq->num_added = 0;
|
||||
list_add_tail(&vq->vq.list, &vdev->vqs);
|
||||
#ifdef DEBUG
|
||||
|
@ -746,8 +766,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
|
|||
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
|
||||
|
||||
/* No callback? Tell other side not to bother us. */
|
||||
if (!callback)
|
||||
vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT);
|
||||
if (!callback) {
|
||||
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
|
||||
vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
|
||||
}
|
||||
|
||||
/* Put everything in free lists. */
|
||||
vq->free_head = 0;
|
||||
|
|
|
@ -110,4 +110,10 @@ static inline void free_page(unsigned long addr)
|
|||
(void) (&_min1 == &_min2); \
|
||||
_min1 < _min2 ? _min1 : _min2; })
|
||||
|
||||
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
|
||||
#define list_add_tail(a, b) do {} while (0)
|
||||
#define list_del(a) do {} while (0)
|
||||
#define list_for_each_entry(a, b, c) while (0)
|
||||
/* end of stubs */
|
||||
|
||||
#endif /* KERNEL_H */
|
||||
|
|
|
@ -3,12 +3,6 @@
|
|||
#include <linux/scatterlist.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
|
||||
#define list_add_tail(a, b) do {} while (0)
|
||||
#define list_del(a) do {} while (0)
|
||||
#define list_for_each_entry(a, b, c) while (0)
|
||||
/* end of stubs */
|
||||
|
||||
struct virtio_device {
|
||||
void *dev;
|
||||
u64 features;
|
||||
|
|
|
@ -40,33 +40,39 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
|
|||
#define virtio_has_feature(dev, feature) \
|
||||
(__virtio_test_bit((dev), feature))
|
||||
|
||||
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
|
||||
{
|
||||
return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
|
||||
virtio_legacy_is_little_endian();
|
||||
}
|
||||
|
||||
/* Memory accessors */
|
||||
static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
|
||||
{
|
||||
return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
|
||||
{
|
||||
return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
|
||||
{
|
||||
return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
|
||||
{
|
||||
return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
|
||||
{
|
||||
return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
|
||||
{
|
||||
return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
|
||||
return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue