virtio: fixes on top of 4.4-rc4

This includes some fixes and cleanups in virtio and vhost code. Most notably,
 shadowing the index fixes the excessive cacheline bouncing observed on AMD
 platforms.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJWZaWhAAoJECgfDbjSjVRpWgkH/27ol7FAizFI6Dli6YjdGk/P
 PJwSE/BR1ki/YohvUAjRZyyb1joBcVXC6rgy/5NFwWW6EfeZoAFaJsEgJO4jcxxw
 UizVUnVVQ2UFmJ+IsCFMQCYf6IaGgFegGABjJNvyVhn+XeHhxkW690Z6GKy/IOx7
 tueq1HaHeXt9Edx7D2cUsp4INmVU51fwrLmS72Z4S87W9pQ0IKG9C8Q1n+pq/9MD
 1PqoqmqVgRjLwndV4YyXCGdJRKIh7SvDTGflm6vyluIbwnFLiz5G59SP4p0GJd6L
 TYhqL8g+kUEjYOyvPqfoFA+aPrOTmCEfsflrpIk6miOwP3QOVbQhEsgauxL/ivo=
 =owan
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio fixes from Michael Tsirkin:
 "This includes some fixes and cleanups in virtio and vhost code.

  Most notably, shadowing the index fixes the excessive cacheline
  bouncing observed on AMD platforms"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio_ring: shadow available ring flags & index
  virtio: Do not drop __GFP_HIGH in alloc_indirect
  vhost: replace % with & on data path
  tools/virtio: fix byteswap logic
  tools/virtio: move list macro stubs
  virtio: fix memory leak of virtio ida cache layers
  vhost: relax log address alignment
  virtio-net: Stop doing DMA from the stack
This commit is contained in:
Linus Torvalds 2015-12-07 10:50:07 -08:00
commit 62ea1ec5e1
7 changed files with 78 additions and 45 deletions

View file

@ -140,6 +140,12 @@ struct virtnet_info {
/* CPU hot plug notifier */
struct notifier_block nb;
/* Control VQ buffers: protected by the rtnl lock */
struct virtio_net_ctrl_hdr ctrl_hdr;
virtio_net_ctrl_ack ctrl_status;
u8 ctrl_promisc;
u8 ctrl_allmulti;
};
struct padded_vnet_hdr {
@ -976,31 +982,30 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
struct scatterlist *out)
{
struct scatterlist *sgs[4], hdr, stat;
struct virtio_net_ctrl_hdr ctrl;
virtio_net_ctrl_ack status = ~0;
unsigned out_num = 0, tmp;
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
ctrl.class = class;
ctrl.cmd = cmd;
vi->ctrl_status = ~0;
vi->ctrl_hdr.class = class;
vi->ctrl_hdr.cmd = cmd;
/* Add header */
sg_init_one(&hdr, &ctrl, sizeof(ctrl));
sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
sgs[out_num++] = &hdr;
if (out)
sgs[out_num++] = out;
/* Add return status. */
sg_init_one(&stat, &status, sizeof(status));
sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
sgs[out_num] = &stat;
BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
if (unlikely(!virtqueue_kick(vi->cvq)))
return status == VIRTIO_NET_OK;
return vi->ctrl_status == VIRTIO_NET_OK;
/* Spin for a response, the kick causes an ioport write, trapping
* into the hypervisor, so the request should be handled immediately.
@ -1009,7 +1014,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
!virtqueue_is_broken(vi->cvq))
cpu_relax();
return status == VIRTIO_NET_OK;
return vi->ctrl_status == VIRTIO_NET_OK;
}
static int virtnet_set_mac_address(struct net_device *dev, void *p)
@ -1151,7 +1156,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
struct scatterlist sg[2];
u8 promisc, allmulti;
struct virtio_net_ctrl_mac *mac_data;
struct netdev_hw_addr *ha;
int uc_count;
@ -1163,22 +1167,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
return;
promisc = ((dev->flags & IFF_PROMISC) != 0);
allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
sg_init_one(sg, &promisc, sizeof(promisc));
sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_PROMISC, sg))
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
promisc ? "en" : "dis");
vi->ctrl_promisc ? "en" : "dis");
sg_init_one(sg, &allmulti, sizeof(allmulti));
sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
allmulti ? "en" : "dis");
vi->ctrl_allmulti ? "en" : "dis");
uc_count = netdev_uc_count(dev);
mc_count = netdev_mc_count(dev);

View file

@ -819,7 +819,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
(a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
(a.log_guest_addr & (sizeof(u64) - 1))) {
(a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) {
r = -EINVAL;
break;
}
@ -1369,7 +1369,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
if (unlikely(__get_user(ring_head,
&vq->avail->ring[last_avail_idx % vq->num]))) {
&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
&vq->avail->ring[last_avail_idx % vq->num]);
@ -1489,7 +1489,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
u16 old, new;
int start;
start = vq->last_used_idx % vq->num;
start = vq->last_used_idx & (vq->num - 1);
used = vq->used->ring + start;
if (count == 1) {
if (__put_user(heads[0].id, &used->id)) {
@ -1531,7 +1531,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
{
int start, n, r;
start = vq->last_used_idx % vq->num;
start = vq->last_used_idx & (vq->num - 1);
n = vq->num - start;
if (n < count) {
r = __vhost_add_used_n(vq, heads, n);

View file

@ -412,6 +412,7 @@ static int virtio_init(void)
static void __exit virtio_exit(void)
{
bus_unregister(&virtio_bus);
ida_destroy(&virtio_index_ida);
}
core_initcall(virtio_init);
module_exit(virtio_exit);

View file

@ -80,6 +80,12 @@ struct vring_virtqueue {
/* Last used index we've seen. */
u16 last_used_idx;
/* Last written value to avail->flags */
u16 avail_flags_shadow;
/* Last written value to avail->idx in guest byte order */
u16 avail_idx_shadow;
/* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq);
@ -109,7 +115,7 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
* otherwise virt_to_phys will give us bogus addresses in the
* virtqueue.
*/
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
gfp &= ~__GFP_HIGHMEM;
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
@ -235,13 +241,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1);
avail = vq->avail_idx_shadow & (vq->vring.num - 1);
vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
/* Descriptors and available array need to be set before we expose the
* new available array entries. */
virtio_wmb(vq->weak_barriers);
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1);
vq->avail_idx_shadow++;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
vq->num_added++;
pr_debug("Added buffer head %i to %p\n", head, vq);
@ -354,8 +361,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
* event. */
virtio_mb(vq->weak_barriers);
old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added;
new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx);
old = vq->avail_idx_shadow - vq->num_added;
new = vq->avail_idx_shadow;
vq->num_added = 0;
#ifdef DEBUG
@ -510,7 +517,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) {
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
virtio_mb(vq->weak_barriers);
}
@ -537,7 +544,11 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT);
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
@ -565,7 +576,10 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
END_USE(vq);
return last_used_idx;
@ -633,9 +647,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
/* TODO: tune this threshold */
bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4;
bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
virtio_mb(vq->weak_barriers);
if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
@ -670,7 +687,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
/* detach_buf clears data, so grab it now. */
buf = vq->data[i];
detach_buf(vq, i);
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
END_USE(vq);
return buf;
}
@ -735,6 +753,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
vq->avail_flags_shadow = 0;
vq->avail_idx_shadow = 0;
vq->num_added = 0;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
@ -746,8 +766,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
if (!callback)
vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT);
if (!callback) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
}
/* Put everything in free lists. */
vq->free_head = 0;

View file

@ -110,4 +110,10 @@ static inline void free_page(unsigned long addr)
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
#define list_add_tail(a, b) do {} while (0)
#define list_del(a) do {} while (0)
#define list_for_each_entry(a, b, c) while (0)
/* end of stubs */
#endif /* KERNEL_H */

View file

@ -3,12 +3,6 @@
#include <linux/scatterlist.h>
#include <linux/kernel.h>
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
#define list_add_tail(a, b) do {} while (0)
#define list_del(a) do {} while (0)
#define list_for_each_entry(a, b, c) while (0)
/* end of stubs */
struct virtio_device {
void *dev;
u64 features;

View file

@ -40,33 +40,39 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
#define virtio_has_feature(dev, feature) \
(__virtio_test_bit((dev), feature))
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
{
return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
virtio_legacy_is_little_endian();
}
/* Memory accessors */
static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
{
return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
{
return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
}
static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
{
return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
{
return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
}
static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
{
return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
{
return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}