From 2b5bbe3b8bee8b38bdc27dd9c0270829b6eb7eeb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:17 -0600 Subject: [PATCH 1/6] virtio_net: skb_orphan() and nf_reset() in xmit path. The complex transmit free logic was introduced to avoid hangs on removing the ip_conntrack module and also because drivers aren't generally supposed to keep stale skbs for unbounded times. After some debate, it was decided that while doing skb_orphan() generally is a rat's nest, we can do it in this driver. Following patches take advantage of this. Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5c498d2b043..dc4c6871897 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -528,8 +528,12 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (err >= 0 && !vi->free_in_tasklet) + if (err >= 0 && !vi->free_in_tasklet) { + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); + } return err; } From 8958f574dbe7e41cc54df0df1accc861bb9f6be8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:18 -0600 Subject: [PATCH 2/6] virtio_net: return NETDEV_TX_BUSY instead of queueing an extra skb. This effectively reverts 99ffc696d10b28580fe93441d627cf290ac4484c "virtio: wean net driver off NETDEV_TX_BUSY". The complexity of queuing an skb (setting a tasklet to re-xmit) is questionable, especially once we get rid of the other reason for the tasklet in the next patch. If the skb won't fit in the tx queue, just return NETDEV_TX_BUSY. This is frowned upon, so a followup patch uses a more complex solution. Signed-off-by: Rusty Russell Cc: Herbert Xu --- drivers/net/virtio_net.c | 46 +++++++++------------------------------- 1 file changed, 10 insertions(+), 36 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dc4c6871897..222f3d098ae 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,9 +48,6 @@ struct virtnet_info struct napi_struct napi; unsigned int status; - /* The skb we couldn't send because buffers were full. */ - struct sk_buff *last_xmit_skb; - /* If we need to free in a timer, this is it. */ struct timer_list xmit_free_timer; @@ -120,9 +117,8 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); - /* Make sure we re-xmit last_xmit_skb: if there are no more packets - * queued, start_xmit won't be called. */ - tasklet_schedule(&vi->tasklet); + if (vi->free_in_tasklet) + tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -543,12 +539,7 @@ static void xmit_tasklet(unsigned long data) struct virtnet_info *vi = (void *)data; netif_tx_lock_bh(vi->dev); - if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) { - vi->svq->vq_ops->kick(vi->svq); - vi->last_xmit_skb = NULL; - } - if (vi->free_in_tasklet) - free_old_xmit_skbs(vi); + free_old_xmit_skbs(vi); netif_tx_unlock_bh(vi->dev); } @@ -560,28 +551,16 @@ again: /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(vi); - /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb) && - xmit_skb(vi, vi->last_xmit_skb) < 0) - goto stop_queue; - - vi->last_xmit_skb = NULL; - /* Put new one in send queue and do transmit */ - if (likely(skb)) { - __skb_queue_head(&vi->send, skb); - if (xmit_skb(vi, skb) < 0) { - vi->last_xmit_skb = skb; - skb = NULL; - goto stop_queue; - } + __skb_queue_head(&vi->send, skb); + if (likely(xmit_skb(vi, skb) >= 0)) { + vi->svq->vq_ops->kick(vi->svq); + return NETDEV_TX_OK; } -done: - vi->svq->vq_ops->kick(vi->svq); - return NETDEV_TX_OK; -stop_queue: + /* Ring too full for this packet, remove it from queue again. */ pr_debug("%s: virtio not prepared to send\n", dev->name); + __skb_unlink(skb, &vi->send); netif_stop_queue(dev); /* Activate callback for using skbs: if this returns false it @@ -591,12 +570,7 @@ stop_queue: netif_start_queue(dev); goto again; } - if (skb) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - } - goto done; + return NETDEV_TX_BUSY; } static int virtnet_set_mac_address(struct net_device *dev, void *p) From b0c39dbdc204006ef3558a66716ff09797619778 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:19 -0600 Subject: [PATCH 3/6] virtio_net: don't free buffers in xmit ring The virtio_net driver is complicated by the two methods of freeing old xmit buffers (in addition to freeing old ones at the start of the xmit path). The original code used a 1/10 second timer attached to xmit_free(), reset on every xmit. Before we orphaned skbs on xmit, the transmitting userspace could block with a full socket until the timer fired, the skb destructor was called, and they were re-woken. So we added the VIRTIO_F_NOTIFY_ON_EMPTY feature: supporting devices send an interrupt (even if normally suppressed) on an empty xmit ring which makes us schedule xmit_tasklet(). This was a benchmark win. Unfortunately, VIRTIO_F_NOTIFY_ON_EMPTY makes quite a lot of work: a host which is faster than the guest will fire the interrupt every xmit packet (slowing the guest down further). Attempting mitigation in the host adds overhead of userspace timers (possibly with the additional pain of signals), and risks increasing latency anyway if you get it wrong. In practice, this effect was masked by benchmarks which take advantage of GSO (with its inherent transmit batching), but it's still there. Now we orphan xmitted skbs, the pressure is off: remove both paths and no longer request VIRTIO_F_NOTIFY_ON_EMPTY. Note that the current QEMU will notify us even if we don't negotiate this feature (legal, but suboptimal); a patch is outstanding to improve that. Move the skb_orphan/nf_reset to after we've done the send and notified the other end, for a slight optimization. Signed-off-by: Rusty Russell Cc: Mark McLoughlin --- drivers/net/virtio_net.c | 64 ++++------------------------------------ 1 file changed, 5 insertions(+), 59 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 222f3d098ae..3041e4eddb3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,16 +48,9 @@ struct virtnet_info struct napi_struct napi; unsigned int status; - /* If we need to free in a timer, this is it. */ - struct timer_list xmit_free_timer; - /* Number of input buffers, and max we've ever had. */ unsigned int num, max; - /* For cleaning up after transmission. */ - struct tasklet_struct tasklet; - bool free_in_tasklet; - /* I like... big packets and I cannot lie! */ bool big_packets; @@ -116,9 +109,6 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); - - if (vi->free_in_tasklet) - tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -458,25 +448,9 @@ static void free_old_xmit_skbs(struct virtnet_info *vi) } } -/* If the virtio transport doesn't always notify us when all in-flight packets - * are consumed, we fall back to using this function on a timer to free them. */ -static void xmit_free(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock(vi->dev); - - free_old_xmit_skbs(vi); - - if (!skb_queue_empty(&vi->send)) - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - - netif_tx_unlock(vi->dev); -} - static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num, err; + int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); @@ -522,25 +496,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) sg_set_buf(sg, hdr, sizeof(*hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - - err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (err >= 0 && !vi->free_in_tasklet) { - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - } - - return err; -} - -static void xmit_tasklet(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock_bh(vi->dev); - free_old_xmit_skbs(vi); - netif_tx_unlock_bh(vi->dev); + return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -555,6 +511,9 @@ again: __skb_queue_head(&vi->send, skb); if (likely(xmit_skb(vi, skb) >= 0)) { vi->svq->vq_ops->kick(vi->svq); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); return NETDEV_TX_OK; } @@ -903,10 +862,6 @@ static int virtnet_probe(struct virtio_device *vdev) vi->pages = NULL; INIT_DELAYED_WORK(&vi->refill, refill_work); - /* If they give us a callback when all buffers are done, we don't need - * the timer. */ - vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); - /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) @@ -938,11 +893,6 @@ static int virtnet_probe(struct virtio_device *vdev) skb_queue_head_init(&vi->recv); skb_queue_head_init(&vi->send); - tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); - - if (!vi->free_in_tasklet) - setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi); - err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -983,9 +933,6 @@ static void virtnet_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - if (!vi->free_in_tasklet) - del_timer_sync(&vi->xmit_free_timer); - /* Free our skbs in send and recv queues, if any. */ while ((skb = __skb_dequeue(&vi->recv)) != NULL) { kfree_skb(skb); @@ -1019,7 +966,6 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { From b3f24698a7faa6e9d8a14124cfdc25353fc8ca19 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:19 -0600 Subject: [PATCH 4/6] virtio_net: formalize skb_vnet_hdr We put the virtio_net_hdr into the skb's cb region; turn this into a union to clean up the code slightly and allow future expansion. Signed-off-by: Rusty Russell Cc: Mark McLoughlin Cc: Dinesh Subhraveti --- drivers/net/virtio_net.c | 81 ++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3041e4eddb3..420388a4c5e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -68,9 +68,16 @@ struct virtnet_info struct page *pages; }; -static inline void *skb_vnet_hdr(struct sk_buff *skb) +struct skb_vnet_hdr { + union { + struct virtio_net_hdr hdr; + struct virtio_net_hdr_mrg_rxbuf mhdr; + }; +}; + +static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { - return (struct virtio_net_hdr *)skb->cb; + return (struct skb_vnet_hdr *)skb->cb; } static void give_a_page(struct virtnet_info *vi, struct page *page) @@ -115,7 +122,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtnet_info *vi = netdev_priv(dev); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); int err; int i; @@ -126,7 +133,6 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } if (vi->mergeable_rx_bufs) { - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); unsigned int copy; char *p = page_address(skb_shinfo(skb)->frags[0].page); @@ -134,8 +140,8 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, len = PAGE_SIZE; len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); - memcpy(hdr, p, sizeof(*mhdr)); - p += sizeof(*mhdr); + memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr)); + p += sizeof(hdr->mhdr); copy = len; if (copy > skb_tailroom(skb)) @@ -150,13 +156,13 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb_shinfo(skb)->nr_frags--; } else { skb_shinfo(skb)->frags[0].page_offset += - sizeof(*mhdr) + copy; + sizeof(hdr->mhdr) + copy; skb_shinfo(skb)->frags[0].size = len; skb->data_len += len; skb->len += len; } - while (--mhdr->num_buffers) { + while (--hdr->mhdr.num_buffers) { struct sk_buff *nskb; i = skb_shinfo(skb)->nr_frags; @@ -170,7 +176,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); if (!nskb) { pr_debug("%s: rx error: %d buffers missing\n", - dev->name, mhdr->num_buffers); + dev->name, hdr->mhdr.num_buffers); dev->stats.rx_length_errors++; goto drop; } @@ -191,7 +197,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, skb->len += len; } } else { - len -= sizeof(struct virtio_net_hdr); + len -= sizeof(hdr->hdr); if (len <= MAX_PACKET_LEN) trim_pages(vi, skb); @@ -209,9 +215,11 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { pr_debug("Needs csum!\n"); - if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) + if (!skb_partial_csum_set(skb, + hdr->hdr.csum_start, + hdr->hdr.csum_offset)) goto frame_err; } @@ -219,9 +227,9 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; @@ -234,14 +242,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, default: if (net_ratelimit()) printk(KERN_WARNING "%s: bad gso type %u.\n", - dev->name, hdr->gso_type); + dev->name, hdr->hdr.gso_type); goto frame_err; } - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb_shinfo(skb)->gso_size = hdr->gso_size; + skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: zero gso size.\n", @@ -272,7 +280,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { - struct virtio_net_hdr *hdr; + struct skb_vnet_hdr *hdr; skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); if (unlikely(!skb)) { @@ -284,7 +292,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) skb_put(skb, MAX_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_set_buf(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -452,8 +460,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); - struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; sg_init_table(sg, 2+MAX_SKB_FRAGS); @@ -461,39 +468,39 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); if (skb->ip_summed == CHECKSUM_PARTIAL) { - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = skb->csum_start - skb_headroom(skb); - hdr->csum_offset = skb->csum_offset; + hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb); + hdr->hdr.csum_offset = skb->csum_offset; } else { - hdr->flags = 0; - hdr->csum_offset = hdr->csum_start = 0; + hdr->hdr.flags = 0; + hdr->hdr.csum_offset = hdr->hdr.csum_start = 0; } if (skb_is_gso(skb)) { - hdr->hdr_len = skb_headlen(skb); - hdr->gso_size = skb_shinfo(skb)->gso_size; + hdr->hdr.hdr_len = skb_headlen(skb); + hdr->hdr.gso_size = skb_shinfo(skb)->gso_size; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) - hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; + hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else { - hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; - hdr->gso_size = hdr->hdr_len = 0; + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + hdr->hdr.gso_size = hdr->hdr.hdr_len = 0; } - mhdr->num_buffers = 0; + hdr->mhdr.num_buffers = 0; /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(sg, mhdr, sizeof(*mhdr)); + sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr)); else - sg_set_buf(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); From 48925e372f04f5e35fec6269127c62b2c71ab794 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:20 -0600 Subject: [PATCH 5/6] virtio_net: avoid (most) NETDEV_TX_BUSY by stopping queue early. Now we can tell the theoretical capacity remaining in the output queue, virtio_net can waste entries by stopping the queue early. It doesn't work in the case of indirect buffers and kmalloc failure, but that's rare (we could drop the packet in that case, but other drivers return TX_BUSY for similar reasons). For the record, I think this patch reflects poorly on the linux network API. Signed-off-by: Rusty Russell Cc: Dinesh Subhraveti --- drivers/net/virtio_net.c | 62 +++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 420388a4c5e..effe8c685f7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1,4 +1,4 @@ -/* A simple network driver using virtio. +/* A network driver using virtio. * * Copyright 2007 Rusty Russell IBM Corporation * @@ -73,6 +73,7 @@ struct skb_vnet_hdr { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mhdr; }; + unsigned int num_sg; }; static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -442,23 +443,24 @@ again: return received; } -static void free_old_xmit_skbs(struct virtnet_info *vi) +static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) { struct sk_buff *skb; - unsigned int len; + unsigned int len, tot_sgs = 0; while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); __skb_unlink(skb, &vi->send); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; + tot_sgs += skb_vnet_hdr(skb)->num_sg; kfree_skb(skb); } + return tot_sgs; } static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; @@ -502,13 +504,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) else sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr)); - num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); + hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; + return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int capacity; again: /* Free up any pending old buffers before queueing new ones. */ @@ -516,27 +519,40 @@ again: /* Put new one in send queue and do transmit */ __skb_queue_head(&vi->send, skb); - if (likely(xmit_skb(vi, skb) >= 0)) { - vi->svq->vq_ops->kick(vi->svq); - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - return NETDEV_TX_OK; + capacity = xmit_skb(vi, skb); + + /* This can happen with OOM and indirect buffers. */ + if (unlikely(capacity < 0)) { + netif_stop_queue(dev); + dev_warn(&dev->dev, "Unexpected full queue\n"); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + vi->svq->vq_ops->disable_cb(vi->svq); + netif_start_queue(dev); + goto again; + } + return NETDEV_TX_BUSY; } - /* Ring too full for this packet, remove it from queue again. */ - pr_debug("%s: virtio not prepared to send\n", dev->name); - __skb_unlink(skb, &vi->send); - netif_stop_queue(dev); + vi->svq->vq_ops->kick(vi->svq); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); - /* Activate callback for using skbs: if this returns false it - * means some were used in the meantime. */ - if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { - vi->svq->vq_ops->disable_cb(vi->svq); - netif_start_queue(dev); - goto again; + /* Apparently nice girls don't return TX_BUSY; stop the queue + * before it gets out of hand. Naturally, this wastes entries. */ + if (capacity < 2+MAX_SKB_FRAGS) { + netif_stop_queue(dev); + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { + /* More just got used, free them then recheck. */ + capacity += free_old_xmit_skbs(vi); + if (capacity >= 2+MAX_SKB_FRAGS) { + netif_start_queue(dev); + vi->svq->vq_ops->disable_cb(vi->svq); + } + } } - return NETDEV_TX_BUSY; + + return NETDEV_TX_OK; } static int virtnet_set_mac_address(struct net_device *dev, void *p) From 0aea51c37fc5868cd723f670af9056c2ef694fee Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Wed, 26 Aug 2009 14:58:28 +0530 Subject: [PATCH 6/6] virtio_net: Check for room in the vq before adding buffer Saves us one cycle of alloc-add-free if the queue was full. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell (modified) --- drivers/net/virtio_net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index effe8c685f7..d445845f277 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -280,7 +280,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) bool oom = false; sg_init_table(sg, 2+MAX_SKB_FRAGS); - for (;;) { + do { struct skb_vnet_hdr *hdr; skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN); @@ -323,7 +323,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } + } while (err >= num); if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq); @@ -341,7 +341,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) if (!vi->mergeable_rx_bufs) return try_fill_recv_maxbufs(vi, gfp); - for (;;) { + do { skb_frag_t *f; skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); @@ -375,7 +375,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp) break; } vi->num++; - } + } while (err > 0); if (unlikely(vi->num > vi->max)) vi->max = vi->num; vi->rvq->vq_ops->kick(vi->rvq);