diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
index 881741660bbbcec7d4a5070f63bc35308094174a..b9d95adaf62fc6bcb3a05ad64ba73a04232a65fe 100644
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
@@ -53,6 +53,7 @@
 #define AG71XX_TX_MTU_LEN	1540
 #define AG71XX_RX_PKT_SIZE	\
 	(ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
+#define AG71XX_RX_BUF_SIZE (AG71XX_RX_PKT_SIZE + NET_SKB_PAD + NET_IP_ALIGN)
 
 #define AG71XX_TX_RING_SIZE_DEFAULT	64
 #define AG71XX_RX_RING_SIZE_DEFAULT	128
@@ -85,7 +86,10 @@ struct ag71xx_desc {
 } __attribute__((aligned(4)));
 
 struct ag71xx_buf {
-	struct sk_buff		*skb;
+	union {
+		struct sk_buff	*skb;
+		void		*rx_buf;
+	};
 	struct ag71xx_desc	*desc;
 	dma_addr_t		dma_addr;
 	unsigned long		timestamp;
diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
index 6d1aff7f7e367ae2bb7711099bb97e9eb633f03a..fb99d272816c7eaa169a5b9ca21cdebec1b92f49 100644
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c
@@ -189,15 +189,17 @@ static void ag71xx_ring_rx_clean(struct ag71xx *ag)
 		return;
 
 	for (i = 0; i < ring->size; i++)
-		if (ring->buf[i].skb) {
+		if (ring->buf[i].rx_buf) {
 			dma_unmap_single(&ag->dev->dev, ring->buf[i].dma_addr,
-					 AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE);
-			kfree_skb(ring->buf[i].skb);
+					 AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
+			kfree(ring->buf[i].rx_buf);
 		}
 }
 
-struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag)
+static int ag71xx_buffer_offset(struct ag71xx *ag)
 {
+	int offset = NET_SKB_PAD;
+
 	/*
 	 * On AR71xx/AR91xx packets must be 4-byte aligned.
 	 *
@@ -205,17 +207,35 @@ struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag)
 	 * so we don't need any extra alignment in that case.
 	 */
 	if (!ag71xx_get_pdata(ag)->is_ar724x || ag71xx_has_ar8216(ag))
-		return netdev_alloc_skb(ag->dev, AG71XX_RX_PKT_SIZE);
+		return offset;
 
-	return netdev_alloc_skb_ip_align(ag->dev, AG71XX_RX_PKT_SIZE);
+	return offset + NET_IP_ALIGN;
 }
 
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+			       int offset)
+{
+	void *data;
+
+	data = kmalloc(AG71XX_RX_BUF_SIZE +
+		       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+		       GFP_ATOMIC);
+	if (!data)
+		return false;
+
+	buf->rx_buf = data;
+	buf->dma_addr = dma_map_single(&ag->dev->dev, data,
+				       AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
+	buf->desc->data = (u32) buf->dma_addr + offset;
+	return true;
+}
 
 static int ag71xx_ring_rx_init(struct ag71xx *ag)
 {
 	struct ag71xx_ring *ring = &ag->rx_ring;
 	unsigned int i;
 	int ret;
+	int offset = ag71xx_buffer_offset(ag);
 
 	ret = 0;
 	for (i = 0; i < ring->size; i++) {
@@ -228,22 +248,11 @@ static int ag71xx_ring_rx_init(struct ag71xx *ag)
 	}
 
 	for (i = 0; i < ring->size; i++) {
-		struct sk_buff *skb;
-		dma_addr_t dma_addr;
-
-		skb = ag71xx_rx_alloc(ag);
-		if (!skb) {
+		if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) {
 			ret = -ENOMEM;
 			break;
 		}
 
-		skb->dev = ag->dev;
-		dma_addr = dma_map_single(&ag->dev->dev, skb->data,
-					  AG71XX_RX_PKT_SIZE,
-					  DMA_FROM_DEVICE);
-		ring->buf[i].skb = skb;
-		ring->buf[i].dma_addr = dma_addr;
-		ring->buf[i].desc->data = (u32) dma_addr;
 		ring->buf[i].desc->ctrl = DESC_EMPTY;
 	}
 
@@ -260,6 +269,7 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
 {
 	struct ag71xx_ring *ring = &ag->rx_ring;
 	unsigned int count;
+	int offset = ag71xx_buffer_offset(ag);
 
 	count = 0;
 	for (; ring->curr - ring->dirty > 0; ring->dirty++) {
@@ -267,24 +277,9 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
 
 		i = ring->dirty % ring->size;
 
-		if (ring->buf[i].skb == NULL) {
-			dma_addr_t dma_addr;
-			struct sk_buff *skb;
-
-			skb = ag71xx_rx_alloc(ag);
-			if (skb == NULL)
-				break;
-
-			skb->dev = ag->dev;
-
-			dma_addr = dma_map_single(&ag->dev->dev, skb->data,
-						  AG71XX_RX_PKT_SIZE,
-						  DMA_FROM_DEVICE);
-
-			ring->buf[i].skb = skb;
-			ring->buf[i].dma_addr = dma_addr;
-			ring->buf[i].desc->data = (u32) dma_addr;
-		}
+		if (!ring->buf[i].rx_buf &&
+		    !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset))
+			break;
 
 		ring->buf[i].desc->ctrl = DESC_EMPTY;
 		count++;
@@ -863,6 +858,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 {
 	struct net_device *dev = ag->dev;
 	struct ag71xx_ring *ring = &ag->rx_ring;
+	int offset = ag71xx_buffer_offset(ag);
 	int done = 0;
 
 	DBG("%s: rx packets, limit=%d, curr=%u, dirty=%u\n",
@@ -885,18 +881,25 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 
 		ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
 
-		skb = ring->buf[i].skb;
 		pktlen = ag71xx_desc_pktlen(desc);
 		pktlen -= ETH_FCS_LEN;
 
 		dma_unmap_single(&dev->dev, ring->buf[i].dma_addr,
-				 AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE);
+				 AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
 		dev->last_rx = jiffies;
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += pktlen;
 
+		skb = build_skb(ring->buf[i].rx_buf);
+		if (!skb) {
+			kfree(ring->buf[i].rx_buf);
+			goto next;
+		}
+
+		skb_reserve(skb, offset);
 		skb_put(skb, pktlen);
+
 		if (ag71xx_has_ar8216(ag))
 			err = ag71xx_remove_ar8216_header(ag, skb, pktlen);
 
@@ -910,7 +913,8 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 			netif_receive_skb(skb);
 		}
 
-		ring->buf[i].skb = NULL;
+next:
+		ring->buf[i].rx_buf = NULL;
 		done++;
 
 		ring->curr++;
@@ -944,7 +948,7 @@ static int ag71xx_poll(struct napi_struct *napi, int limit)
 	ag71xx_debugfs_update_napi_stats(ag, rx_done, tx_done);
 
 	rx_ring = &ag->rx_ring;
-	if (rx_ring->buf[rx_ring->dirty % rx_ring->size].skb == NULL)
+	if (rx_ring->buf[rx_ring->dirty % rx_ring->size].rx_buf == NULL)
 		goto oom;
 
 	status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);