diff --git a/target/linux/generic/patches-4.0/072-01-bgmac-fix-descriptor-frame-start-end-definitions.patch b/target/linux/generic/patches-4.0/072-01-bgmac-fix-descriptor-frame-start-end-definitions.patch
new file mode 100644
index 0000000000000000000000000000000000000000..fdfae3aeff904ca1321ee8bae6429c653c4d5db5
--- /dev/null
+++ b/target/linux/generic/patches-4.0/072-01-bgmac-fix-descriptor-frame-start-end-definitions.patch
@@ -0,0 +1,24 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:40:06 +0100
+Subject: [PATCH] bgmac: fix descriptor frame start/end definitions
+
+The start-of-frame and end-of-frame bits were accidentally swapped.
+In the current code it does not make any difference, since they are
+always used together.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -345,8 +345,8 @@
+ 
+ #define BGMAC_DESC_CTL0_EOT			0x10000000	/* End of ring */
+ #define BGMAC_DESC_CTL0_IOC			0x20000000	/* IRQ on complete */
+-#define BGMAC_DESC_CTL0_SOF			0x40000000	/* Start of frame */
+-#define BGMAC_DESC_CTL0_EOF			0x80000000	/* End of frame */
++#define BGMAC_DESC_CTL0_EOF			0x40000000	/* End of frame */
++#define BGMAC_DESC_CTL0_SOF			0x80000000	/* Start of frame */
+ #define BGMAC_DESC_CTL1_LEN			0x00001FFF
+ 
+ #define BGMAC_PHY_NOREGS			0x1E
diff --git a/target/linux/generic/patches-4.0/072-02-bgmac-implement-GRO-and-use-build_skb.patch b/target/linux/generic/patches-4.0/072-02-bgmac-implement-GRO-and-use-build_skb.patch
new file mode 100644
index 0000000000000000000000000000000000000000..2a2df608b9a8ee5e5c07e79d87f984eff60c0f76
--- /dev/null
+++ b/target/linux/generic/patches-4.0/072-02-bgmac-implement-GRO-and-use-build_skb.patch
@@ -0,0 +1,189 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:41:25 +0100
+Subject: [PATCH] bgmac: implement GRO and use build_skb
+
+This improves performance for routing and local rx
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -276,31 +276,31 @@ static int bgmac_dma_rx_skb_for_slot(str
+ 				     struct bgmac_slot_info *slot)
+ {
+ 	struct device *dma_dev = bgmac->core->dma_dev;
+-	struct sk_buff *skb;
+ 	dma_addr_t dma_addr;
+ 	struct bgmac_rx_header *rx;
++	void *buf;
+ 
+ 	/* Alloc skb */
+-	skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
+-	if (!skb)
++	buf = netdev_alloc_frag(BGMAC_RX_ALLOC_SIZE);
++	if (!buf)
+ 		return -ENOMEM;
+ 
+ 	/* Poison - if everything goes fine, hardware will overwrite it */
+-	rx = (struct bgmac_rx_header *)skb->data;
++	rx = buf;
+ 	rx->len = cpu_to_le16(0xdead);
+ 	rx->flags = cpu_to_le16(0xbeef);
+ 
+ 	/* Map skb for the DMA */
+-	dma_addr = dma_map_single(dma_dev, skb->data,
+-				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
++	dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
++				  DMA_FROM_DEVICE);
+ 	if (dma_mapping_error(dma_dev, dma_addr)) {
+ 		bgmac_err(bgmac, "DMA mapping error\n");
+-		dev_kfree_skb(skb);
++		put_page(virt_to_head_page(buf));
+ 		return -ENOMEM;
+ 	}
+ 
+ 	/* Update the slot */
+-	slot->skb = skb;
++	slot->buf = buf;
+ 	slot->dma_addr = dma_addr;
+ 
+ 	return 0;
+@@ -343,8 +343,9 @@ static int bgmac_dma_rx_read(struct bgma
+ 	while (ring->start != ring->end) {
+ 		struct device *dma_dev = bgmac->core->dma_dev;
+ 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+-		struct sk_buff *skb = slot->skb;
+-		struct bgmac_rx_header *rx;
++		struct bgmac_rx_header *rx = slot->buf;
++		struct sk_buff *skb;
++		void *buf = slot->buf;
+ 		u16 len, flags;
+ 
+ 		/* Unmap buffer to make it accessible to the CPU */
+@@ -352,7 +353,6 @@ static int bgmac_dma_rx_read(struct bgma
+ 					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ 
+ 		/* Get info from the header */
+-		rx = (struct bgmac_rx_header *)skb->data;
+ 		len = le16_to_cpu(rx->len);
+ 		flags = le16_to_cpu(rx->flags);
+ 
+@@ -393,12 +393,13 @@ static int bgmac_dma_rx_read(struct bgma
+ 			dma_unmap_single(dma_dev, old_dma_addr,
+ 					 BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ 
++			skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
+ 			skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
+ 			skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
+ 
+ 			skb_checksum_none_assert(skb);
+ 			skb->protocol = eth_type_trans(skb, bgmac->net_dev);
+-			netif_receive_skb(skb);
++			napi_gro_receive(&bgmac->napi, skb);
+ 			handled++;
+ 		} while (0);
+ 
+@@ -434,12 +435,11 @@ static bool bgmac_dma_unaligned(struct b
+ 	return false;
+ }
+ 
+-static void bgmac_dma_ring_free(struct bgmac *bgmac,
+-				struct bgmac_dma_ring *ring)
++static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
++				   struct bgmac_dma_ring *ring)
+ {
+ 	struct device *dma_dev = bgmac->core->dma_dev;
+ 	struct bgmac_slot_info *slot;
+-	int size;
+ 	int i;
+ 
+ 	for (i = 0; i < ring->num_slots; i++) {
+@@ -451,23 +451,55 @@ static void bgmac_dma_ring_free(struct b
+ 			dev_kfree_skb(slot->skb);
+ 		}
+ 	}
++}
++
++static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
++				   struct bgmac_dma_ring *ring)
++{
++	struct device *dma_dev = bgmac->core->dma_dev;
++	struct bgmac_slot_info *slot;
++	int i;
++
++	for (i = 0; i < ring->num_slots; i++) {
++		slot = &ring->slots[i];
++		if (!slot->buf)
++			continue;
+ 
+-	if (ring->cpu_base) {
+-		/* Free ring of descriptors */
+-		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+-		dma_free_coherent(dma_dev, size, ring->cpu_base,
+-				  ring->dma_base);
++		if (slot->dma_addr)
++			dma_unmap_single(dma_dev, slot->dma_addr,
++					 BGMAC_RX_BUF_SIZE,
++					 DMA_FROM_DEVICE);
++		put_page(virt_to_head_page(slot->buf));
+ 	}
+ }
+ 
++static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
++				     struct bgmac_dma_ring *ring)
++{
++	struct device *dma_dev = bgmac->core->dma_dev;
++	int size;
++
++	if (!ring->cpu_base)
++	    return;
++
++	/* Free ring of descriptors */
++	size = ring->num_slots * sizeof(struct bgmac_dma_desc);
++	dma_free_coherent(dma_dev, size, ring->cpu_base,
++			  ring->dma_base);
++}
++
+ static void bgmac_dma_free(struct bgmac *bgmac)
+ {
+ 	int i;
+ 
+-	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+-		bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
+-	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+-		bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
++	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
++		bgmac_dma_tx_ring_free(bgmac, &bgmac->tx_ring[i]);
++		bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i]);
++	}
++	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
++		bgmac_dma_rx_ring_free(bgmac, &bgmac->rx_ring[i]);
++		bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i]);
++	}
+ }
+ 
+ static int bgmac_dma_alloc(struct bgmac *bgmac)
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -362,6 +362,8 @@
+ #define BGMAC_RX_FRAME_OFFSET			30		/* There are 2 unused bytes between header and real data */
+ #define BGMAC_RX_MAX_FRAME_SIZE			1536		/* Copied from b44/tg3 */
+ #define BGMAC_RX_BUF_SIZE			(BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
++#define BGMAC_RX_ALLOC_SIZE			(SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
++						 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+ 
+ #define BGMAC_BFL_ENETROBO			0x0010		/* has ephy roboswitch spi */
+ #define BGMAC_BFL_ENETADM			0x0080		/* has ADMtek switch */
+@@ -383,7 +385,10 @@
+ #define ETHER_MAX_LEN   1518
+ 
+ struct bgmac_slot_info {
+-	struct sk_buff *skb;
++	union {
++		struct sk_buff *skb;
++		void *buf;
++	};
+ 	dma_addr_t dma_addr;
+ };
+ 
diff --git a/target/linux/generic/patches-4.0/072-03-bgmac-implement-scatter-gather-support.patch b/target/linux/generic/patches-4.0/072-03-bgmac-implement-scatter-gather-support.patch
new file mode 100644
index 0000000000000000000000000000000000000000..5cb21a565aaffa5634ce0107fbea9f9f9d1f2b2b
--- /dev/null
+++ b/target/linux/generic/patches-4.0/072-03-bgmac-implement-scatter-gather-support.patch
@@ -0,0 +1,267 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Mon, 23 Mar 2015 02:42:26 +0100
+Subject: [PATCH] bgmac: implement scatter/gather support
+
+Always use software checksumming, since the hardware does not have any
+checksum offload support.
+This significantly improves local TCP tx performance.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
+ 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+ }
+ 
++static void
++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
++		     int i, int len, u32 ctl0)
++{
++	struct bgmac_slot_info *slot;
++	struct bgmac_dma_desc *dma_desc;
++	u32 ctl1;
++
++	if (i == ring->num_slots - 1)
++		ctl0 |= BGMAC_DESC_CTL0_EOT;
++
++	ctl1 = len & BGMAC_DESC_CTL1_LEN;
++
++	slot = &ring->slots[i];
++	dma_desc = &ring->cpu_base[i];
++	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
++	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
++	dma_desc->ctl0 = cpu_to_le32(ctl0);
++	dma_desc->ctl1 = cpu_to_le32(ctl1);
++}
++
+ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+ 				    struct bgmac_dma_ring *ring,
+ 				    struct sk_buff *skb)
+ {
+ 	struct device *dma_dev = bgmac->core->dma_dev;
+ 	struct net_device *net_dev = bgmac->net_dev;
+-	struct bgmac_dma_desc *dma_desc;
+-	struct bgmac_slot_info *slot;
+-	u32 ctl0, ctl1;
++	struct bgmac_slot_info *slot = &ring->slots[ring->end];
+ 	int free_slots;
++	int nr_frags;
++	u32 flags;
++	int index = ring->end;
++	int i;
+ 
+ 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
+ 		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+-		goto err_stop_drop;
++		goto err_drop;
+ 	}
+ 
++	if (skb->ip_summed == CHECKSUM_PARTIAL)
++		skb_checksum_help(skb);
++
++	nr_frags = skb_shinfo(skb)->nr_frags;
++
+ 	if (ring->start <= ring->end)
+ 		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+ 	else
+ 		free_slots = ring->start - ring->end;
+-	if (free_slots == 1) {
++
++	if (free_slots <= nr_frags + 1) {
+ 		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ 		netif_stop_queue(net_dev);
+ 		return NETDEV_TX_BUSY;
+ 	}
+ 
+-	slot = &ring->slots[ring->end];
+-	slot->skb = skb;
+-	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
++	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
+ 					DMA_TO_DEVICE);
+-	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+-		bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+-			  ring->mmio_base);
+-		goto err_stop_drop;
+-	}
++	if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++		goto err_dma_head;
+ 
+-	ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+-	if (ring->end == ring->num_slots - 1)
+-		ctl0 |= BGMAC_DESC_CTL0_EOT;
+-	ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
++	flags = BGMAC_DESC_CTL0_SOF;
++	if (!nr_frags)
++		flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++	bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
++	flags = 0;
++
++	for (i = 0; i < nr_frags; i++) {
++		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
++		int len = skb_frag_size(frag);
++
++		index = (index + 1) % BGMAC_TX_RING_SLOTS;
++		slot = &ring->slots[index];
++		slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
++						  len, DMA_TO_DEVICE);
++		if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++			goto err_dma;
+ 
+-	dma_desc = ring->cpu_base;
+-	dma_desc += ring->end;
+-	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+-	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+-	dma_desc->ctl0 = cpu_to_le32(ctl0);
+-	dma_desc->ctl1 = cpu_to_le32(ctl1);
++		if (i == nr_frags - 1)
++			flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++		bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
++	}
++
++	slot->skb = skb;
+ 
+ 	netdev_sent_queue(net_dev, skb->len);
+ 
+@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ 	/* Increase ring->end to point empty slot. We tell hardware the first
+ 	 * slot it should *not* read.
+ 	 */
+-	if (++ring->end >= BGMAC_TX_RING_SLOTS)
+-		ring->end = 0;
++	ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+ 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+ 		    ring->index_base +
+ 		    ring->end * sizeof(struct bgmac_dma_desc));
+ 
+-	/* Always keep one slot free to allow detecting bugged calls. */
+-	if (--free_slots == 1)
++	free_slots -= nr_frags + 1;
++	if (free_slots < 8)
+ 		netif_stop_queue(net_dev);
+ 
+ 	return NETDEV_TX_OK;
+ 
+-err_stop_drop:
+-	netif_stop_queue(net_dev);
++err_dma:
++	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
++			 DMA_TO_DEVICE);
++
++	while (i > 0) {
++		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
++		struct bgmac_slot_info *slot = &ring->slots[index];
++		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
++		int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++
++		dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
++	}
++
++err_dma_head:
++	bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
++		  ring->mmio_base);
++
++err_drop:
+ 	dev_kfree_skb(skb);
+ 	return NETDEV_TX_OK;
+ }
+@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
+ 
+ 	while (ring->start != empty_slot) {
+ 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
++		u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
++		int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+ 
+-		if (slot->skb) {
++		if (!slot->dma_addr) {
++			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
++				  ring->start, ring->end);
++			goto next;
++		}
++
++		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+ 			/* Unmap no longer used buffer */
+-			dma_unmap_single(dma_dev, slot->dma_addr,
+-					 slot->skb->len, DMA_TO_DEVICE);
+-			slot->dma_addr = 0;
++			dma_unmap_single(dma_dev, slot->dma_addr, len,
++					 DMA_TO_DEVICE);
++		else
++			dma_unmap_page(dma_dev, slot->dma_addr, len,
++				       DMA_TO_DEVICE);
+ 
++		if (slot->skb) {
+ 			bytes_compl += slot->skb->len;
+ 			pkts_compl++;
+ 
+ 			/* Free memory! :) */
+ 			dev_kfree_skb(slot->skb);
+ 			slot->skb = NULL;
+-		} else {
+-			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+-				  ring->start, ring->end);
+ 		}
+ 
++next:
++		slot->dma_addr = 0;
+ 		if (++ring->start >= BGMAC_TX_RING_SLOTS)
+ 			ring->start = 0;
+ 		freed = true;
+ 	}
+ 
++	if (!pkts_compl)
++		return;
++
+ 	netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
+ 
+-	if (freed && netif_queue_stopped(bgmac->net_dev))
++	if (netif_queue_stopped(bgmac->net_dev))
+ 		netif_wake_queue(bgmac->net_dev);
+ }
+ 
+@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
+ 				   struct bgmac_dma_ring *ring)
+ {
+ 	struct device *dma_dev = bgmac->core->dma_dev;
++	struct bgmac_dma_desc *dma_desc = ring->cpu_base;
+ 	struct bgmac_slot_info *slot;
+ 	int i;
+ 
+ 	for (i = 0; i < ring->num_slots; i++) {
++		int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
++
+ 		slot = &ring->slots[i];
+-		if (slot->skb) {
+-			if (slot->dma_addr)
+-				dma_unmap_single(dma_dev, slot->dma_addr,
+-						 slot->skb->len, DMA_TO_DEVICE);
+-			dev_kfree_skb(slot->skb);
+-		}
++		dev_kfree_skb(slot->skb);
++
++		if (!slot->dma_addr)
++			continue;
++
++		if (slot->skb)
++			dma_unmap_single(dma_dev, slot->dma_addr,
++					 len, DMA_TO_DEVICE);
++		else
++			dma_unmap_page(dma_dev, slot->dma_addr,
++				       len, DMA_TO_DEVICE);
+ 	}
+ }
+ 
+@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
+ 		goto err_dma_free;
+ 	}
+ 
++	net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++	net_dev->hw_features = net_dev->features;
++	net_dev->vlan_features = net_dev->features;
++
+ 	err = register_netdev(bgmac->net_dev);
+ 	if (err) {
+ 		bgmac_err(bgmac, "Cannot register net device\n");
diff --git a/target/linux/generic/patches-4.0/072-04-bgmac-simplify-tx-ring-index-handling.patch b/target/linux/generic/patches-4.0/072-04-bgmac-simplify-tx-ring-index-handling.patch
new file mode 100644
index 0000000000000000000000000000000000000000..241a3083a95316af075b56f0e36506692acd0c2d
--- /dev/null
+++ b/target/linux/generic/patches-4.0/072-04-bgmac-simplify-tx-ring-index-handling.patch
@@ -0,0 +1,125 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 09:58:56 +0200
+Subject: [PATCH] bgmac: simplify tx ring index handling
+
+Keep incrementing ring->start and ring->end instead of pointing it to
+the actual ring slot entry. This simplifies the calculation of the
+number of free slots.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -142,11 +142,10 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ {
+ 	struct device *dma_dev = bgmac->core->dma_dev;
+ 	struct net_device *net_dev = bgmac->net_dev;
+-	struct bgmac_slot_info *slot = &ring->slots[ring->end];
+-	int free_slots;
++	int index = ring->end % BGMAC_TX_RING_SLOTS;
++	struct bgmac_slot_info *slot = &ring->slots[index];
+ 	int nr_frags;
+ 	u32 flags;
+-	int index = ring->end;
+ 	int i;
+ 
+ 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
+@@ -159,12 +158,10 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ 
+ 	nr_frags = skb_shinfo(skb)->nr_frags;
+ 
+-	if (ring->start <= ring->end)
+-		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+-	else
+-		free_slots = ring->start - ring->end;
+-
+-	if (free_slots <= nr_frags + 1) {
++	/* ring->end - ring->start will return the number of valid slots,
++	 * even when ring->end overflows
++	 */
++	if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
+ 		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ 		netif_stop_queue(net_dev);
+ 		return NETDEV_TX_BUSY;
+@@ -200,7 +197,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ 	}
+ 
+ 	slot->skb = skb;
+-
++	ring->end += nr_frags + 1;
+ 	netdev_sent_queue(net_dev, skb->len);
+ 
+ 	wmb();
+@@ -208,13 +205,12 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ 	/* Increase ring->end to point empty slot. We tell hardware the first
+ 	 * slot it should *not* read.
+ 	 */
+-	ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+ 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+ 		    ring->index_base +
+-		    ring->end * sizeof(struct bgmac_dma_desc));
++		    (ring->end % BGMAC_TX_RING_SLOTS) *
++		    sizeof(struct bgmac_dma_desc));
+ 
+-	free_slots -= nr_frags + 1;
+-	if (free_slots < 8)
++	if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
+ 		netif_stop_queue(net_dev);
+ 
+ 	return NETDEV_TX_OK;
+@@ -256,17 +252,17 @@ static void bgmac_dma_tx_free(struct bgm
+ 	empty_slot &= BGMAC_DMA_TX_STATDPTR;
+ 	empty_slot /= sizeof(struct bgmac_dma_desc);
+ 
+-	while (ring->start != empty_slot) {
+-		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+-		u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
+-		int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++	while (ring->start != ring->end) {
++		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
++		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
++		u32 ctl1;
++		int len;
+ 
+-		if (!slot->dma_addr) {
+-			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+-				  ring->start, ring->end);
+-			goto next;
+-		}
++		if (slot_idx == empty_slot)
++			break;
+ 
++		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
++		len = ctl1 & BGMAC_DESC_CTL1_LEN;
+ 		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+ 			/* Unmap no longer used buffer */
+ 			dma_unmap_single(dma_dev, slot->dma_addr, len,
+@@ -284,10 +280,8 @@ static void bgmac_dma_tx_free(struct bgm
+ 			slot->skb = NULL;
+ 		}
+ 
+-next:
+ 		slot->dma_addr = 0;
+-		if (++ring->start >= BGMAC_TX_RING_SLOTS)
+-			ring->start = 0;
++		ring->start++;
+ 		freed = true;
+ 	}
+ 
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
+  * empty.
+  */
+ struct bgmac_dma_ring {
+-	u16 num_slots;
+-	u16 start;
+-	u16 end;
++	u32 start;
++	u32 end;
+ 
++	u16 num_slots;
+ 	u16 mmio_base;
+ 	struct bgmac_dma_desc *cpu_base;
+ 	dma_addr_t dma_base;
diff --git a/target/linux/generic/patches-4.0/775-bgmac-check-length-of-received-frame.patch b/target/linux/generic/patches-4.0/775-bgmac-check-length-of-received-frame.patch
deleted file mode 100644
index 883c77b2f5265499426868b3b695aa29323bc8f4..0000000000000000000000000000000000000000
--- a/target/linux/generic/patches-4.0/775-bgmac-check-length-of-received-frame.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 2d12a9abf3f81de5b51852e3cfcba8cedac82642 Mon Sep 17 00:00:00 2001
-From: Hauke Mehrtens <hauke@hauke-m.de>
-Date: Fri, 6 Dec 2013 01:14:52 +0100
-Subject: [PATCH] bgmac: check length of received frame
-
----
- drivers/net/ethernet/broadcom/bgmac.c |    9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
---- a/drivers/net/ethernet/broadcom/bgmac.c
-+++ b/drivers/net/ethernet/broadcom/bgmac.c
-@@ -361,6 +361,27 @@ static int bgmac_dma_rx_read(struct bgma
- 			dma_addr_t old_dma_addr = slot->dma_addr;
- 			int err;
- 
-+			if (len > BGMAC_RX_MAX_FRAME_SIZE) {
-+				struct bgmac_dma_desc *dma_desc = ring->cpu_base + ring->start;
-+
-+				bgmac_err(bgmac, "Hardware reported invalid packet length %d for slot %d!\n", len, ring->start);
-+				bgmac_err(bgmac, "flags: 0x%04X\n", flags);
-+				bgmac_err(bgmac, "ctl0: 0x%08X\tctl1: 0x%08X\n", le32_to_cpu(dma_desc->ctl0), le32_to_cpu(dma_desc->ctl1));
-+
-+				bgmac_err(bgmac, "   BGMAC_DMA_RX_CTL: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL));
-+				bgmac_err(bgmac, " BGMAC_DMA_RX_INDEX: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX));
-+				bgmac_err(bgmac, "BGMAC_DMA_RX_RINGLO: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO));
-+				bgmac_err(bgmac, "BGMAC_DMA_RX_RINGHI: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGHI));
-+				bgmac_err(bgmac, "BGMAC_DMA_RX_STATUS: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_STATUS));
-+				bgmac_err(bgmac, " BGMAC_DMA_RX_ERROR: 0x%08X\n", bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_ERROR));
-+
-+				dma_sync_single_for_device(dma_dev,
-+							   slot->dma_addr,
-+							   BGMAC_RX_BUF_SIZE,
-+							   DMA_FROM_DEVICE);
-+				break;
-+			}
-+
- 			/* Check for poison and drop or pass the packet */
- 			if (len == 0xdead && flags == 0xbeef) {
- 				bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",