From ac16d1484efd5d2d9077c55453b1f8abff49fc18 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 28 May 2013 17:32:32 +0200 Subject: [PATCH 01/21] batman-adv: wait for rtnl in batadv_store_mesh_iface instead of failing if it is taken The rtnl_lock in batadv_store_mesh_iface has been converted to a rtnl_trylock some time ago to avoid a possible deadlock between rtnl and s_active on removal of the sysfs nodes. The behaviour introduced by that was quite confusing as it could lead to the sysfs store to fail, making batman-adv setup scripts unreliable. As recently the sysfs removal was postponed to a worker not running with the rtnl taken, the deadlock can't occur any more and it is safe to change the trylock back to a lock to make the sysfs store reliable again. Signed-off-by: Matthias Schiffer Reviewed-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/sysfs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 15a22efa9a67..929e304dacb2 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -582,10 +582,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) goto out; - if (!rtnl_trylock()) { - ret = -ERESTARTSYS; - goto out; - } + rtnl_lock(); if (status_tmp == BATADV_IF_NOT_IN_USE) { batadv_hardif_disable_interface(hard_iface, From 7c24bbbeab4159f924b65b1e94878e597b762714 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 23 May 2013 13:07:42 +0200 Subject: [PATCH 02/21] batman-adv: forward late OGMs from best next hop When a packet is received from another node first and later from the best next hop, this packet is dropped. However the first OGM was sent with the BATADV_NOT_BEST_NEXT_HOP flag and thus dropped by neighbors. The late OGM from the best neighbor is then dropped because it is a duplicate. If this situation happens constantly, a node might end up not forwarding the "valid" OGMs anymore, and nodes behind will starve from not getting valid OGMs. Fix this by refining the duplicate checking behaviour: The actions should depend on whether it was a duplicate for a neighbor only or for the originator. OGMs which are not duplicates for a specific neighbor will now be considered in batadv_iv_ogm_forward(), but only actually forwarded for the best next hop. Therefore, late OGMs from the best next hop are forwarded now and not dropped as duplicates anymore. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 86 ++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 071f288b77a8..f680ee101878 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -29,6 +29,21 @@ #include "bat_algo.h" #include "network-coding.h" +/** + * batadv_dup_status - duplicate status + * @BATADV_NO_DUP: the packet is a duplicate + * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the + * neighbor) + * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor + * @BATADV_PROTECTED: originator is currently protected (after reboot) + */ +enum batadv_dup_status { + BATADV_NO_DUP = 0, + BATADV_ORIG_DUP, + BATADV_NEIGH_DUP, + BATADV_PROTECTED, +}; + static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, @@ -650,7 +665,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, const struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_hard_iface *if_incoming, const unsigned char *tt_buff, - int is_duplicate) + enum batadv_dup_status dup_status) { struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; @@ -676,7 +691,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, continue; } - if (is_duplicate) + if (dup_status != BATADV_NO_DUP) continue; spin_lock_bh(&tmp_neigh_node->lq_update_lock); @@ -718,7 +733,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node->tq_avg = batadv_ring_buffer_avg(neigh_node->tq_recv); spin_unlock_bh(&neigh_node->lq_update_lock); - if (!is_duplicate) { + if (dup_status == BATADV_NO_DUP) { orig_node->last_ttl = batadv_ogm_packet->header.ttl; neigh_node->last_ttl = batadv_ogm_packet->header.ttl; } @@ -902,15 +917,16 @@ out: return ret; } -/* processes a batman packet for all interfaces, adjusts the sequence number and - * finds out whether it is a duplicate. - * returns: - * 1 the packet is a duplicate - * 0 the packet has not yet been received - * -1 the packet is old and has been received while the seqno window - * was protected. Caller should drop it. +/** + * batadv_iv_ogm_update_seqnos - process a batman packet for all interfaces, + * adjust the sequence number and find out whether it is a duplicate + * @ethhdr: ethernet header of the packet + * @batadv_ogm_packet: OGM packet to be considered + * @if_incoming: interface on which the OGM packet was received + * + * Returns duplicate status as enum batadv_dup_status */ -static int +static enum batadv_dup_status batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, const struct batadv_hard_iface *if_incoming) @@ -918,17 +934,18 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; struct batadv_neigh_node *tmp_neigh_node; - int is_duplicate = 0; + int is_dup; int32_t seq_diff; int need_update = 0; - int set_mark, ret = -1; + int set_mark; + enum batadv_dup_status ret = BATADV_NO_DUP; uint32_t seqno = ntohl(batadv_ogm_packet->seqno); uint8_t *neigh_addr; uint8_t packet_count; orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig); if (!orig_node) - return 0; + return BATADV_NO_DUP; spin_lock_bh(&orig_node->ogm_cnt_lock); seq_diff = seqno - orig_node->last_real_seqno; @@ -936,22 +953,29 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, - &orig_node->batman_seqno_reset)) + &orig_node->batman_seqno_reset)) { + ret = BATADV_PROTECTED; goto out; + } rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { - is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits, - orig_node->last_real_seqno, - seqno); - neigh_addr = tmp_neigh_node->addr; + is_dup = batadv_test_bit(tmp_neigh_node->real_bits, + orig_node->last_real_seqno, + seqno); + if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && - tmp_neigh_node->if_incoming == if_incoming) + tmp_neigh_node->if_incoming == if_incoming) { set_mark = 1; - else + if (is_dup) + ret = BATADV_NEIGH_DUP; + } else { set_mark = 0; + if (is_dup && (ret != BATADV_NEIGH_DUP)) + ret = BATADV_ORIG_DUP; + } /* if the window moved, set the update flag. */ need_update |= batadv_bit_get_packet(bat_priv, @@ -971,8 +995,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, orig_node->last_real_seqno = seqno; } - ret = is_duplicate; - out: spin_unlock_bh(&orig_node->ogm_cnt_lock); batadv_orig_node_free_ref(orig_node); @@ -994,7 +1016,8 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, int is_broadcast = 0, is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; - int is_duplicate, sameseq, simlar_ttl; + int sameseq, similar_ttl; + enum batadv_dup_status dup_status; uint32_t if_incoming_seqno; uint8_t *prev_sender; @@ -1138,10 +1161,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (!orig_node) return; - is_duplicate = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet, - if_incoming); + dup_status = batadv_iv_ogm_update_seqnos(ethhdr, batadv_ogm_packet, + if_incoming); - if (is_duplicate == -1) { + if (dup_status == BATADV_PROTECTED) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time (sender: %pM)\n", ethhdr->h_source); @@ -1211,11 +1234,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, * seqno and similar ttl as the non-duplicate */ sameseq = orig_node->last_real_seqno == ntohl(batadv_ogm_packet->seqno); - simlar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; - if (is_bidirect && (!is_duplicate || (sameseq && simlar_ttl))) + similar_ttl = orig_node->last_ttl - 3 <= batadv_ogm_packet->header.ttl; + if (is_bidirect && ((dup_status == BATADV_NO_DUP) || + (sameseq && similar_ttl))) batadv_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, batadv_ogm_packet, if_incoming, - tt_buff, is_duplicate); + tt_buff, dup_status); /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { @@ -1236,7 +1260,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, goto out_neigh; } - if (is_duplicate) { + if (dup_status == BATADV_NEIGH_DUP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: duplicate packet received\n"); goto out_neigh; From d5b4c93e67b0b1291aa8e2aaf694e40afc3412d0 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 7 Jun 2013 16:52:05 +0200 Subject: [PATCH 03/21] batman-adv: Don't handle address updates when bla is disabled The bridge loop avoidance has a hook to handle address updates of the originator. These should not be handled when bridge loop avoidance is disabled - it might send some bridge loop avoidance packets which should not appear if bla is disabled. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bridge_loop_avoidance.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 379061c72549..de27b3175cfd 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1067,6 +1067,10 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN)); bat_priv->bla.claim_dest.group = group; + /* purge everything when bridge loop avoidance is turned off */ + if (!atomic_read(&bat_priv->bridge_loop_avoidance)) + oldif = NULL; + if (!oldif) { batadv_bla_purge_claims(bat_priv, NULL, 1); batadv_bla_purge_backbone_gw(bat_priv, 1); From 99ffc3e74fb0d9d321d2f19c6021e0dbaff2f4b2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 13 Jun 2013 10:07:29 +0300 Subject: [PATCH 04/21] macvlan: don't touch promisc without passthrough commit df8ef8f3aaa6692970a436204c4429210addb23a "macvlan: add FDB bridge ops and macvlan flags" added a way to control NOPROMISC macvlan flag through netlink. However, with a non passthrough device we never set promisc on open, even if NOPROMISC is off. As a result: If userspace clears NOPROMISC on open, then does not clear it on a netlink command, promisc counter is not decremented on stop and there will be no way to clear it once macvlan is detached. If userspace does not clear NOPROMISC on open, then sets NOPROMISC on a netlink command, promisc counter will be decremented from 0 and overflow to fffffffff with no way to clear promisc. To fix, simply ignore NOPROMISC flag in a netlink command for non-passthrough devices, same as we do at open/close. Since we touch this code anyway - check dev_set_promiscuity return code and pass it to users (though an error here is unlikely). Cc: "David S. Miller" Reviewed-by: John Fastabend Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1c502bb0c916..6e91931a1c2c 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -853,18 +853,24 @@ static int macvlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct macvlan_dev *vlan = netdev_priv(dev); - if (data && data[IFLA_MACVLAN_MODE]) - vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); + if (data && data[IFLA_MACVLAN_FLAGS]) { __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC; + if (vlan->port->passthru && promisc) { + int err; - if (promisc && (flags & MACVLAN_FLAG_NOPROMISC)) - dev_set_promiscuity(vlan->lowerdev, -1); - else if (promisc && !(flags & MACVLAN_FLAG_NOPROMISC)) - dev_set_promiscuity(vlan->lowerdev, 1); + if (flags & MACVLAN_FLAG_NOPROMISC) + err = dev_set_promiscuity(vlan->lowerdev, -1); + else + err = dev_set_promiscuity(vlan->lowerdev, 1); + if (err < 0) + return err; + } vlan->flags = flags; } + if (data && data[IFLA_MACVLAN_MODE]) + vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]); return 0; } From 94f950c4060cd9b1989c565284beb159b9705a50 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 11 Jun 2013 11:00:34 +0100 Subject: [PATCH 05/21] xen-netback: don't de-reference vif pointer after having called xenvif_put() When putting vif-s on the rx notify list, calling xenvif_put() must be deferred until after the removal from the list and the issuing of the notification, as both operations dereference the pointer. Changing this got me to notice that the "irq" variable was effectively unused (and was of too narrow type anyway). Signed-off-by: Jan Beulich Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 37984e6d4e99..8c20935d72c9 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -662,7 +662,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) { struct xenvif *vif = NULL, *tmp; s8 status; - u16 irq, flags; + u16 flags; struct xen_netif_rx_response *resp; struct sk_buff_head rxq; struct sk_buff *skb; @@ -771,13 +771,13 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) sco->meta_slots_used); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); - irq = vif->irq; - if (ret && list_empty(&vif->notify_list)) - list_add_tail(&vif->notify_list, ¬ify); xenvif_notify_tx_completion(vif); - xenvif_put(vif); + if (ret && list_empty(&vif->notify_list)) + list_add_tail(&vif->notify_list, ¬ify); + else + xenvif_put(vif); npo.meta_cons += sco->meta_slots_used; dev_kfree_skb(skb); } @@ -785,6 +785,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) { notify_remote_via_irq(vif->irq); list_del_init(&vif->notify_list); + xenvif_put(vif); } /* More work to do? */ From 0c5fed09ab0feedd43c362b1c7fff67fdbf9548f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Tue, 11 Jun 2013 17:18:22 +0530 Subject: [PATCH 06/21] be2net: Fix 32-bit DMA Mask handling Fix to set the coherent DMA mask only if dma_set_mask() succeeded, and to error out if either fails. Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 8bc1b21b1c79..a0b4be51f0d1 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4262,6 +4262,9 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) netdev->features |= NETIF_F_HIGHDMA; } else { status = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (!status) + status = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32)); if (status) { dev_err(&pdev->dev, "Could not set PCI DMA Mask\n"); goto free_netdev; From 631f24a2febb228f82604dc5330091e8080cd8ae Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Wed, 12 Jun 2013 11:05:03 -0500 Subject: [PATCH 07/21] net: ethernet: stmicro: stmmac: Fix compile error when STMMAC_XMIT_DEBUG used drivers/net/ethernet/stmicro/stmmac/stmmac_main.c: In function: stmmac_xmit drivers/net/ethernet/stmicro/stmmac/stmmac_main.c:1902:74: error: expected ) before __func__ Signed-off-by: Dinh Nguyen Cc: Giuseppe Cavallaro CC: David S. Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 618446ae1ec1..ee919ca8b8a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1899,7 +1899,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) #ifdef STMMAC_XMIT_DEBUG if (netif_msg_pktdata(priv)) { - pr_info("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d" + pr_info("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d", __func__, (priv->cur_tx % txsize), (priv->dirty_tx % txsize), entry, first, nfrags); if (priv->extend_desc) From 2dc85bf323515e59e15dfa858d1472bb25cad0fe Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 12 Jun 2013 16:02:27 +0200 Subject: [PATCH 08/21] packet: packet_getname_spkt: make sure string is always 0-terminated uaddr->sa_data is exactly of size 14, which is hard-coded here and passed as a size argument to strncpy(). A device name can be of size IFNAMSIZ (== 16), meaning we might leave the destination string unterminated. Thus, use strlcpy() and also sizeof() while we're at it. We need to memset the data area beforehand, since strlcpy does not padd the remaining buffer with zeroes for user space, so that we do not possibly leak anything. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ec1bca7f859..20a1bd0e6549 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2851,12 +2851,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) - strncpy(uaddr->sa_data, dev->name, 14); - else - memset(uaddr->sa_data, 0, 14); + strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); *uaddr_len = sizeof(*uaddr); From b8fad459f9cc8417b74f71c6c229eef7412163d1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 12 Jun 2013 00:07:01 +0200 Subject: [PATCH 09/21] bonding: reset master mac on first enslave failure If the bond device is supposed to get the first slave's MAC address and the first enslavement fails then we need to reset the master's MAC otherwise it will stay the same as the failed slave device. We do it after err_undo_flags since that is the first place where the MAC can be changed and we check if it should've been the first slave and if the bond's MAC was set to it because that err place is used by multiple locations prior to changing the master's MAC address. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 29b846cbfb48..473633ab5f56 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1957,6 +1957,10 @@ err_free: err_undo_flags: bond_compute_features(bond); + /* Enslave of first slave has failed and we need to fix master's mac */ + if (bond->slave_cnt == 0 && + ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) + eth_hw_addr_random(bond_dev); return res; } From 4f5474e7fd68988cb11373fc698bf10b35b49e31 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 12 Jun 2013 00:07:02 +0200 Subject: [PATCH 10/21] bonding: fix igmp_retrans type and two related races First the type of igmp_retrans (which is the actual counter of igmp_resend parameter) is changed to u8 to be able to store values up to 255 (as per documentation). There are two races that were hidden there and which are easy to trigger after the previous fix, the first is between bond_resend_igmp_join_requests and bond_change_active_slave where igmp_retrans is set and can be altered by the periodic. The second race condition is between multiple running instances of the periodic (upon execution it can be scheduled again for immediate execution which can cause the counter to go < 0 which in the unsigned case leads to unnecessary igmp retransmissions). Since in bond_change_active_slave bond->lock is held for reading and curr_slave_lock for writing, we use curr_slave_lock for mutual exclusion. We can't drop them as there're cases where RTNL is not held when bond_change_active_slave is called. RCU is unlocked in bond_resend_igmp_join_requests before getting curr_slave_lock since we don't need it there and it's pointless to delay. The decrement is moved inside the "if" block because if we decrement unconditionally there's still a possibility for a race condition although it is much more difficult to hit (many changes have to happen in a very short period in order to trigger) which in the case of 3 parallel running instances of this function and igmp_retrans == 1 (with check bond->igmp_retrans-- > 1) is: f1 passes, doesn't re-schedule, but decrements - igmp_retrans = 0 f2 then passes, doesn't re-schedule, but decrements - igmp_retrans = 255 f3 does the unnecessary retransmissions. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 19 +++++++++++++------ drivers/net/bonding/bonding.h | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 473633ab5f56..02d9ae7d527e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -764,8 +764,8 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) struct net_device *bond_dev, *vlan_dev, *upper_dev; struct vlan_entry *vlan; - rcu_read_lock(); read_lock(&bond->lock); + rcu_read_lock(); bond_dev = bond->dev; @@ -787,12 +787,19 @@ static void bond_resend_igmp_join_requests(struct bonding *bond) if (vlan_dev) __bond_resend_igmp_join_requests(vlan_dev); } - - if (--bond->igmp_retrans > 0) - queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - - read_unlock(&bond->lock); rcu_read_unlock(); + + /* We use curr_slave_lock to protect against concurrent access to + * igmp_retrans from multiple running instances of this function and + * bond_change_active_slave + */ + write_lock_bh(&bond->curr_slave_lock); + if (bond->igmp_retrans > 1) { + bond->igmp_retrans--; + queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); + } + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); } static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 2baec24388b1..f989e1529a29 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -225,7 +225,7 @@ struct bonding { rwlock_t curr_slave_lock; u8 send_peer_notif; s8 setup_by_slave; - s8 igmp_retrans; + u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; char proc_file_name[IFNAMSIZ]; From 55b92b7a11690bc377b5d373872a6b650ae88e64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 12 Jun 2013 16:07:23 +0200 Subject: [PATCH 11/21] l2tp: Fix PPP header erasure and memory leak Copy user data after PPP framing header. This prevents erasure of the added PPP header and avoids leaking two bytes of uninitialised memory at the end of skb's data buffer. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 637a341c1e2d..681c626068b4 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -346,12 +346,12 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh skb_put(skb, 2); /* Copy user data into skb */ - error = memcpy_fromiovec(skb->data, m->msg_iov, total_len); + error = memcpy_fromiovec(skb_put(skb, total_len), m->msg_iov, + total_len); if (error < 0) { kfree_skb(skb); goto error_put_sess_tun; } - skb_put(skb, total_len); l2tp_xmit_skb(session, skb, session->hdr_len); From a6f79d0f26704214b5b702bbac525cb72997f984 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 12 Jun 2013 16:07:36 +0200 Subject: [PATCH 12/21] l2tp: Fix sendmsg() return value PPPoL2TP sockets should comply with the standard send*() return values (i.e. return number of bytes sent instead of 0 upon success). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 681c626068b4..8dec6876dc50 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -358,7 +358,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh sock_put(ps->tunnel_sock); sock_put(sk); - return error; + return total_len; error_put_sess_tun: sock_put(ps->tunnel_sock); From df465abfe06f7dc4f33f4a96d17f096e9e8ac917 Mon Sep 17 00:00:00 2001 From: Nithin Sujir Date: Wed, 12 Jun 2013 11:08:59 -0700 Subject: [PATCH 13/21] tg3: Wait for boot code to finish after power on Some systems that don't need wake-on-lan may choose to power down the chip on system standby. Upon resume, the power on causes the boot code to startup and initialize the hardware. On one new platform, this is causing the device to go into a bad state due to a race between the driver and boot code, once every several hundred resumes. The same race exists on open since we come up from a power on. This patch adds a wait for boot code signature at the beginning of tg3_init_hw() which is common to both cases. If there has not been a power-off or the boot code has already completed, the signature will be present and poll_fw() returns immediately. Also return immediately if the device does not have firmware. Cc: stable@vger.kernel.org Signed-off-by: Nithin Nayak Sujir Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 0f493c8dc28b..c777b9013164 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -1800,6 +1800,9 @@ static int tg3_poll_fw(struct tg3 *tp) int i; u32 val; + if (tg3_flag(tp, NO_FWARE_REPORTED)) + return 0; + if (tg3_flag(tp, IS_SSB_CORE)) { /* We don't use firmware. */ return 0; @@ -10404,6 +10407,13 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) */ static int tg3_init_hw(struct tg3 *tp, bool reset_phy) { + /* Chip may have been just powered on. If so, the boot code may still + * be running initialization. Wait for it to finish to avoid races in + * accessing the hardware. + */ + tg3_enable_register_access(tp); + tg3_poll_fw(tp); + tg3_switch_clocks(tp); tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0); From baafc77b32f647daa7c45825f7af8cdd55d00817 Mon Sep 17 00:00:00 2001 From: Saurabh Mohan Date: Mon, 10 Jun 2013 17:45:10 -0700 Subject: [PATCH 14/21] net/ipv4: ip_vti clear skb cb before tunneling. If users apply shaper to vti tunnel then it will cause a kernel crash. The problem seems to be due to the vti_tunnel_xmit function not clearing skb->opt field before passing the packet to xfrm tunneling code. Signed-off-by: Saurabh Mohan Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9d2bdb2c1d3f..c118f6b576bb 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -361,8 +361,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) tunnel->err_count = 0; } - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); nf_reset(skb); From 5033ec3e3f923a371c28f0c3df45905a9dd9c457 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 11 Jun 2013 15:32:04 +0530 Subject: [PATCH 15/21] drivers: net: davinci_mdio: moving mdio resume earlier than cpsw ethernet driver MDIO driver should resume before CPSW ethernet driver so that CPSW connect to the phy and start tx/rx ethernet packets, changing the suspend/resume apis with suspend_late/resume_early. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index b2275d1b19b3..74e56b3fba11 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -476,8 +476,8 @@ static int davinci_mdio_resume(struct device *dev) } static const struct dev_pm_ops davinci_mdio_pm_ops = { - .suspend = davinci_mdio_suspend, - .resume = davinci_mdio_resume, + .suspend_late = davinci_mdio_suspend, + .resume_early = davinci_mdio_resume, }; static const struct of_device_id davinci_mdio_of_mtable[] = { From cc60ab0a8b5b62ea6b5cc1c6397adb5b4bd41271 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 11 Jun 2013 15:32:05 +0530 Subject: [PATCH 16/21] drivers: net: davinci_mdio: restore mdio clk divider in mdio resume During suspend resume cycle all the register data is lost, so MDIO clock divier value gets reset. This patch restores the clock divider value. Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_mdio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 74e56b3fba11..c47f0dbcebb5 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -459,15 +459,12 @@ static int davinci_mdio_suspend(struct device *dev) static int davinci_mdio_resume(struct device *dev) { struct davinci_mdio_data *data = dev_get_drvdata(dev); - u32 ctrl; pm_runtime_get_sync(data->dev); spin_lock(&data->lock); /* restart the scan state machine */ - ctrl = __raw_readl(&data->regs->control); - ctrl |= CONTROL_ENABLE; - __raw_writel(ctrl, &data->regs->control); + __davinci_mdio_reset(data); data->suspended = false; spin_unlock(&data->lock); From d3b6f6141831b6e2d414edea6cc7af5b9bc6fac2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Jun 2013 13:26:05 -0700 Subject: [PATCH 17/21] ip_tunnel: remove __net_init/exit from exported functions If CONFIG_NET_NS is not set then __net_init is the same as __init and __net_exit is the same as __exit. These functions will be removed from memory after the module loads or is removed. Functions that are exported for use by other functions should never be labeled for removal. Bug introduced by commit c54419321455631079c ("GRE: Refactor GRE tunneling code.") Reported-by: Steinar H. Gunderson Signed-off-by: Steven Rostedt Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 6 +++--- net/ipv4/ip_tunnel.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4b6f0b28f41f..09b1360e10bf 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -95,10 +95,10 @@ struct ip_tunnel_net { int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); -int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, - struct rtnl_link_ops *ops, char *devname); +int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname); -void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn); +void ip_tunnel_delete_net(struct ip_tunnel_net *itn); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index be2f8da0ae8e..7fa8f08fa7ae 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -853,7 +853,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); -int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); @@ -899,7 +899,7 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) unregister_netdevice_queue(itn->fb_tunnel_dev, head); } -void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +void ip_tunnel_delete_net(struct ip_tunnel_net *itn) { LIST_HEAD(list); From dd019897358b815f7828dab90b51d51df4d3658d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 13 Jun 2013 10:15:45 +0900 Subject: [PATCH 18/21] net: sh_eth: fix incorrect RX length error if R8A7740 This patch fixes an issue that the driver increments the "RX length error" on every buffer in sh_eth_rx() if the R8A7740. This patch also adds a description about the Receive Frame Status bits. Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index b4479b5aaee4..5e3982fc5398 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1401,16 +1401,23 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status) desc_status = edmac_to_cpu(mdp, rxdesc->status); pkt_len = rxdesc->frame_length; -#if defined(CONFIG_ARCH_R8A7740) - desc_status >>= 16; -#endif - if (--boguscnt < 0) break; if (!(desc_status & RDFEND)) ndev->stats.rx_length_errors++; +#if defined(CONFIG_ARCH_R8A7740) + /* + * In case of almost all GETHER/ETHERs, the Receive Frame State + * (RFS) bits in the Receive Descriptor 0 are from bit 9 to + * bit 0. However, in case of the R8A7740's GETHER, the RFS + * bits are from bit 25 to bit 16. So, the driver needs right + * shifting by 16. + */ + desc_status >>= 16; +#endif + if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 | RD_RFS5 | RD_RFS6 | RD_RFS10)) { ndev->stats.rx_errors++; From c9bfbb31af7c8428267b34eb9706a621ac219a28 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 13 Jun 2013 15:31:28 -0400 Subject: [PATCH 19/21] tulip: Properly check dma mapping result Tulip throws an error when dma debugging is enabled, as it doesn't properly check dma mapping results with dma_mapping_error() durring tx ring refills. Easy fix, just add it in, and drop the frame if the mapping is bad Signed-off-by: Neil Horman CC: Grant Grundler CC: "David S. Miller" Reviewed-by: Grant Grundler Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/interrupt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c index 28a5e425fecf..92306b320840 100644 --- a/drivers/net/ethernet/dec/tulip/interrupt.c +++ b/drivers/net/ethernet/dec/tulip/interrupt.c @@ -76,6 +76,12 @@ int tulip_refill_rx(struct net_device *dev) mapping = pci_map_single(tp->pdev, skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&tp->pdev->dev, mapping)) { + dev_kfree_skb(skb); + tp->rx_buffers[entry].skb = NULL; + break; + } + tp->rx_buffers[entry].mapping = mapping; tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping); From aaf9522d62d18626a60f7f2080671d853d9e8681 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Thu, 13 Jun 2013 09:09:47 -0400 Subject: [PATCH 20/21] netiucv: Hold rtnl between name allocation and device registration. fixes a race condition between concurrent initializations of netiucv devices that try to use the same name. sysfs: cannot create duplicate filename '/devices/iucv/netiucv2' [...] Call Trace: ([<00000000002edea4>] sysfs_add_one+0xb0/0xdc) [<00000000002eecd4>] create_dir+0x80/0xfc [<00000000002eee38>] sysfs_create_dir+0xe8/0x118 [<00000000003835a8>] kobject_add_internal+0x120/0x2d0 [<00000000003839d6>] kobject_add+0x62/0x9c [<00000000003d9564>] device_add+0xcc/0x510 [<000003e00212c7b4>] netiucv_register_device+0xc0/0x1ec [netiucv] Signed-off-by: Benjamin Poirier Tested-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/netiucv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 4ffa66c87ea5..9ca3996f65b2 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -2040,6 +2040,7 @@ static struct net_device *netiucv_init_netdevice(char *username, char *userdata) netiucv_setup_netdevice); if (!dev) return NULL; + rtnl_lock(); if (dev_alloc_name(dev, dev->name) < 0) goto out_netdev; @@ -2061,6 +2062,7 @@ static struct net_device *netiucv_init_netdevice(char *username, char *userdata) out_fsm: kfree_fsm(privptr->fsm); out_netdev: + rtnl_unlock(); free_netdev(dev); return NULL; } @@ -2100,6 +2102,7 @@ static ssize_t conn_write(struct device_driver *drv, rc = netiucv_register_device(dev); if (rc) { + rtnl_unlock(); IUCV_DBF_TEXT_(setup, 2, "ret %d from netiucv_register_device\n", rc); goto out_free_ndev; @@ -2109,7 +2112,8 @@ static ssize_t conn_write(struct device_driver *drv, priv = netdev_priv(dev); SET_NETDEV_DEV(dev, priv->dev); - rc = register_netdev(dev); + rc = register_netdevice(dev); + rtnl_unlock(); if (rc) goto out_unreg; From c5c7774d7eb4397891edca9ebdf750ba90977a69 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 12 Jun 2013 14:26:44 -0400 Subject: [PATCH 21/21] sctp: fully initialize sctp_outq in sctp_outq_init In commit 2f94aabd9f6c925d77aecb3ff020f1cc12ed8f86 (refactor sctp_outq_teardown to insure proper re-initalization) we modified sctp_outq_teardown to use sctp_outq_init to fully re-initalize the outq structure. Steve West recently asked me why I removed the q->error = 0 initalization from sctp_outq_teardown. I did so because I was operating under the impression that sctp_outq_init would properly initalize that value for us, but it doesn't. sctp_outq_init operates under the assumption that the outq struct is all 0's (as it is when called from sctp_association_init), but using it in __sctp_outq_teardown violates that assumption. We should do a memset in sctp_outq_init to ensure that the entire structure is in a known state there instead. Signed-off-by: Neil Horman Reported-by: "West, Steve (NSN - US/Fort Worth)" CC: Vlad Yasevich CC: netdev@vger.kernel.org CC: davem@davemloft.net Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 32a4625fef77..be35e2dbcc9a 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -206,6 +206,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary, */ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) { + memset(q, 0, sizeof(struct sctp_outq)); + q->asoc = asoc; INIT_LIST_HEAD(&q->out_chunk_list); INIT_LIST_HEAD(&q->control_chunk_list); @@ -213,11 +215,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - q->fast_rtx = 0; - q->outstanding_bytes = 0; q->empty = 1; - q->cork = 0; - q->out_qlen = 0; } /* Free the outqueue structure and any related pending chunks.