OpenWrt – Rev 4

Subversion Repositories:
Rev:
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -7,9 +7,12 @@ Required properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
 - interrupts: Should contain the STMMAC interrupts
-- interrupt-names: Should contain the interrupt names "macirq"
-  "eth_wake_irq" if this interrupt is supported in the "interrupts"
-  property
+- interrupt-names: Should contain a list of interrupt names corresponding to
+       the interrupts in the interrupts property, if available.
+       Valid interrupt names are:
+  - "macirq" (combined signal for various interrupt events)
+  - "eth_wake_irq" (the interrupt to manage the remote wake-up packet detection)
+  - "eth_lpi" (the interrupt that occurs when Tx or Rx enters/exits LPI state)
 - phy-mode: See ethernet.txt file in the same directory.
 - snps,reset-gpio      gpio number for phy reset.
 - snps,reset-active-low boolean flag to indicate if phy reset is active low.
@@ -28,9 +31,9 @@ Optional properties:
   clocks may be specified in derived bindings.
 - clock-names: One name for each entry in the clocks property, the
   first one should be "stmmaceth" and the second one should be "pclk".
-- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is
-  available this clock is used for programming the Timestamp Addend Register.
-  If not passed then the system clock will be used and this is fine on some
+- ptp_ref: this is the PTP reference clock; in case of the PTP is available
+  this clock is used for programming the Timestamp Addend Register. If not
+  passed then the system clock will be used and this is fine on some
   platforms.
 - tx-fifo-depth: See ethernet.txt file in the same directory
 - rx-fifo-depth: See ethernet.txt file in the same directory
@@ -72,7 +75,45 @@ Optional properties:
        - snps,mb: mixed-burst
        - snps,rb: rebuild INCRx Burst
 - mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
-
+- Multiple RX Queues parameters: below the list of all the parameters to
+                                configure the multiple RX queues:
+       - snps,rx-queues-to-use: number of RX queues to be used in the driver
+       - Choose one of these RX scheduling algorithms:
+               - snps,rx-sched-sp: Strict priority
+               - snps,rx-sched-wsp: Weighted Strict priority
+       - For each RX queue
+               - Choose one of these modes:
+                       - snps,dcb-algorithm: Queue to be enabled as DCB
+                       - snps,avb-algorithm: Queue to be enabled as AVB
+               - snps,map-to-dma-channel: Channel to map
+               - Specifiy specific packet routing:
+                       - snps,route-avcp: AV Untagged Control packets
+                       - snps,route-ptp: PTP Packets
+                       - snps,route-dcbcp: DCB Control Packets
+                       - snps,route-up: Untagged Packets
+                       - snps,route-multi-broad: Multicast & Broadcast Packets
+               - snps,priority: RX queue priority (Range: 0x0 to 0xF)
+- Multiple TX Queues parameters: below the list of all the parameters to
+                                configure the multiple TX queues:
+       - snps,tx-queues-to-use: number of TX queues to be used in the driver
+       - Choose one of these TX scheduling algorithms:
+               - snps,tx-sched-wrr: Weighted Round Robin
+               - snps,tx-sched-wfq: Weighted Fair Queuing
+               - snps,tx-sched-dwrr: Deficit Weighted Round Robin
+               - snps,tx-sched-sp: Strict priority
+       - For each TX queue
+               - snps,weight: TX queue weight (if using a DCB weight algorithm)
+               - Choose one of these modes:
+                       - snps,dcb-algorithm: TX queue will be working in DCB
+                       - snps,avb-algorithm: TX queue will be working in AVB
+                         [Attention] Queue 0 is reserved for legacy traffic
+                         and so no AVB is available in this queue.
+               - Configure Credit Base Shaper (if AVB Mode selected):
+                       - snps,send_slope: enable Low Power Interface
+                       - snps,idle_slope: unlock on WoL
+                       - snps,high_credit: max write outstanding req. limit
+                       - snps,low_credit: max read outstanding req. limit
+               - snps,priority: TX queue priority (Range: 0x0 to 0xF)
 Examples:
 
        stmmac_axi_setup: stmmac-axi-config {
@@ -81,12 +122,41 @@ Examples:
                snps,blen = <256 128 64 32 0 0 0>;
        };
 
+       mtl_rx_setup: rx-queues-config {
+               snps,rx-queues-to-use = <1>;
+               snps,rx-sched-sp;
+               queue0 {
+                       snps,dcb-algorithm;
+                       snps,map-to-dma-channel = <0x0>;
+                       snps,priority = <0x0>;
+               };
+       };
+
+       mtl_tx_setup: tx-queues-config {
+               snps,tx-queues-to-use = <2>;
+               snps,tx-sched-wrr;
+               queue0 {
+                       snps,weight = <0x10>;
+                       snps,dcb-algorithm;
+                       snps,priority = <0x0>;
+               };
+
+               queue1 {
+                       snps,avb-algorithm;
+                       snps,send_slope = <0x1000>;
+                       snps,idle_slope = <0x1000>;
+                       snps,high_credit = <0x3E800>;
+                       snps,low_credit = <0xFFC18000>;
+                       snps,priority = <0x1>;
+               };
+       };
+
        gmac0: ethernet@e0800000 {
                compatible = "st,spear600-gmac";
                reg = <0xe0800000 0x8000>;
                interrupt-parent = <&vic1>;
-               interrupts = <24 23>;
-               interrupt-names = "macirq", "eth_wake_irq";
+               interrupts = <24 23 22>;
+               interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
                mac-address = [000000000000]; /* Filled in by U-Boot */
                max-frame-size = <3800>;
                phy-mode = "gmii";
@@ -104,4 +174,6 @@ Examples:
                        phy1: ethernet-phy@0 {
                        };
                };
+               snps,mtl-rx-config = <&mtl_rx_setup>;
+               snps,mtl-tx-config = <&mtl_tx_setup>;
        };
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -37,6 +37,7 @@
 #define TSE_PCS_CONTROL_AN_EN_MASK                     BIT(12)
 #define TSE_PCS_CONTROL_REG                            0x00
 #define TSE_PCS_CONTROL_RESTART_AN_MASK                        BIT(9)
+#define TSE_PCS_CTRL_AUTONEG_SGMII                     0x1140
 #define TSE_PCS_IF_MODE_REG                            0x28
 #define TSE_PCS_LINK_TIMER_0_REG                       0x24
 #define TSE_PCS_LINK_TIMER_1_REG                       0x26
@@ -65,6 +66,7 @@
 #define TSE_PCS_SW_RESET_TIMEOUT                       100
 #define TSE_PCS_USE_SGMII_AN_MASK                      BIT(1)
 #define TSE_PCS_USE_SGMII_ENA                          BIT(0)
+#define TSE_PCS_IF_USE_SGMII                           0x03
 
 #define SGMII_ADAPTER_CTRL_REG                         0x00
 #define SGMII_ADAPTER_DISABLE                          0x0001
@@ -101,7 +103,9 @@ int tse_pcs_init(void __iomem *base, str
 {
        int ret = 0;
 
-       writew(TSE_PCS_USE_SGMII_ENA, base + TSE_PCS_IF_MODE_REG);
+       writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG);
+
+       writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG);
 
        writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG);
        writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG);
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -26,12 +26,15 @@
 
 static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 {
-       struct stmmac_priv *priv = (struct stmmac_priv *)p;
-       unsigned int entry = priv->cur_tx;
-       struct dma_desc *desc = priv->dma_tx + entry;
+       struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
        unsigned int nopaged_len = skb_headlen(skb);
+       struct stmmac_priv *priv = tx_q->priv_data;
+       unsigned int entry = tx_q->cur_tx;
        unsigned int bmax, des2;
        unsigned int i = 1, len;
+       struct dma_desc *desc;
+
+       desc = tx_q->dma_tx + entry;
 
        if (priv->plat->enh_desc)
                bmax = BUF_SIZE_8KiB;
@@ -45,16 +48,16 @@ static int stmmac_jumbo_frm(void *p, str
        desc->des2 = cpu_to_le32(des2);
        if (dma_mapping_error(priv->device, des2))
                return -1;
-       priv->tx_skbuff_dma[entry].buf = des2;
-       priv->tx_skbuff_dma[entry].len = bmax;
+       tx_q->tx_skbuff_dma[entry].buf = des2;
+       tx_q->tx_skbuff_dma[entry].len = bmax;
        /* do not close the descriptor and do not set own bit */
        priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
-                                       0, false);
+                                       0, false, skb->len);
 
        while (len != 0) {
-               priv->tx_skbuff[entry] = NULL;
+               tx_q->tx_skbuff[entry] = NULL;
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
-               desc = priv->dma_tx + entry;
+               desc = tx_q->dma_tx + entry;
 
                if (len > bmax) {
                        des2 = dma_map_single(priv->device,
@@ -63,11 +66,11 @@ static int stmmac_jumbo_frm(void *p, str
                        desc->des2 = cpu_to_le32(des2);
                        if (dma_mapping_error(priv->device, des2))
                                return -1;
-                       priv->tx_skbuff_dma[entry].buf = des2;
-                       priv->tx_skbuff_dma[entry].len = bmax;
+                       tx_q->tx_skbuff_dma[entry].buf = des2;
+                       tx_q->tx_skbuff_dma[entry].len = bmax;
                        priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
                                                        STMMAC_CHAIN_MODE, 1,
-                                                       false);
+                                                       false, skb->len);
                        len -= bmax;
                        i++;
                } else {
@@ -77,17 +80,17 @@ static int stmmac_jumbo_frm(void *p, str
                        desc->des2 = cpu_to_le32(des2);
                        if (dma_mapping_error(priv->device, des2))
                                return -1;
-                       priv->tx_skbuff_dma[entry].buf = des2;
-                       priv->tx_skbuff_dma[entry].len = len;
+                       tx_q->tx_skbuff_dma[entry].buf = des2;
+                       tx_q->tx_skbuff_dma[entry].len = len;
                        /* last descriptor can be set now */
                        priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
                                                        STMMAC_CHAIN_MODE, 1,
-                                                       true);
+                                                       true, skb->len);
                        len = 0;
                }
        }
 
-       priv->cur_tx = entry;
+       tx_q->cur_tx = entry;
 
        return entry;
 }
@@ -136,32 +139,34 @@ static void stmmac_init_dma_chain(void *
 
 static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 {
-       struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
+       struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)priv_ptr;
+       struct stmmac_priv *priv = rx_q->priv_data;
 
        if (priv->hwts_rx_en && !priv->extend_desc)
                /* NOTE: Device will overwrite des3 with timestamp value if
                 * 1588-2002 time stamping is enabled, hence reinitialize it
                 * to keep explicit chaining in the descriptor.
                 */
-               p->des3 = cpu_to_le32((unsigned int)(priv->dma_rx_phy +
-                                     (((priv->dirty_rx) + 1) %
+               p->des3 = cpu_to_le32((unsigned int)(rx_q->dma_rx_phy +
+                                     (((rx_q->dirty_rx) + 1) %
                                       DMA_RX_SIZE) *
                                      sizeof(struct dma_desc)));
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 {
-       struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
-       unsigned int entry = priv->dirty_tx;
+       struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
+       struct stmmac_priv *priv = tx_q->priv_data;
+       unsigned int entry = tx_q->dirty_tx;
 
-       if (priv->tx_skbuff_dma[entry].last_segment && !priv->extend_desc &&
+       if (tx_q->tx_skbuff_dma[entry].last_segment && !priv->extend_desc &&
            priv->hwts_tx_en)
                /* NOTE: Device will overwrite des3 with timestamp value if
                 * 1588-2002 time stamping is enabled, hence reinitialize it
                 * to keep explicit chaining in the descriptor.
                 */
-               p->des3 = cpu_to_le32((unsigned int)((priv->dma_tx_phy +
-                                     ((priv->dirty_tx + 1) % DMA_TX_SIZE))
+               p->des3 = cpu_to_le32((unsigned int)((tx_q->dma_tx_phy +
+                                     ((tx_q->dirty_tx + 1) % DMA_TX_SIZE))
                                      * sizeof(struct dma_desc)));
 }
 
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -246,6 +246,15 @@ struct stmmac_extra_stats {
 #define STMMAC_TX_MAX_FRAMES   256
 #define STMMAC_TX_FRAMES       64
 
+/* Packets types */
+enum packets_types {
+       PACKET_AVCPQ = 0x1, /* AV Untagged Control packets */
+       PACKET_PTPQ = 0x2, /* PTP Packets */
+       PACKET_DCBCPQ = 0x3, /* DCB Control Packets */
+       PACKET_UPQ = 0x4, /* Untagged Packets */
+       PACKET_MCBCQ = 0x5, /* Multicast & Broadcast Packets */
+};
+
 /* Rx IPC status */
 enum rx_frame_status {
        good_frame = 0x0,
@@ -324,6 +333,9 @@ struct dma_features {
        unsigned int number_tx_queues;
        /* Alternate (enhanced) DESC mode */
        unsigned int enh_desc;
+       /* TX and RX FIFO sizes */
+       unsigned int tx_fifo_size;
+       unsigned int rx_fifo_size;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -361,7 +373,7 @@ struct stmmac_desc_ops {
        /* Invoked by the xmit function to prepare the tx descriptor */
        void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len,
                                 bool csum_flag, int mode, bool tx_own,
-                                bool ls);
+                                bool ls, unsigned int tot_pkt_len);
        void (*prepare_tso_tx_desc)(struct dma_desc *p, int is_fs, int len1,
                                    int len2, bool tx_own, bool ls,
                                    unsigned int tcphdrlen,
@@ -413,6 +425,14 @@ struct stmmac_dma_ops {
        int (*reset)(void __iomem *ioaddr);
        void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
                     u32 dma_tx, u32 dma_rx, int atds);
+       void (*init_chan)(void __iomem *ioaddr,
+                         struct stmmac_dma_cfg *dma_cfg, u32 chan);
+       void (*init_rx_chan)(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_rx_phy, u32 chan);
+       void (*init_tx_chan)(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_tx_phy, u32 chan);
        /* Configure the AXI Bus Mode Register */
        void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
        /* Dump DMA registers */
@@ -421,25 +441,28 @@ struct stmmac_dma_ops {
         * An invalid value enables the store-and-forward mode */
        void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
                         int rxfifosz);
+       void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+                           int fifosz);
+       void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel);
        /* To track extra statistic (if supported) */
        void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
                                   void __iomem *ioaddr);
        void (*enable_dma_transmission) (void __iomem *ioaddr);
-       void (*enable_dma_irq) (void __iomem *ioaddr);
-       void (*disable_dma_irq) (void __iomem *ioaddr);
-       void (*start_tx) (void __iomem *ioaddr);
-       void (*stop_tx) (void __iomem *ioaddr);
-       void (*start_rx) (void __iomem *ioaddr);
-       void (*stop_rx) (void __iomem *ioaddr);
+       void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
+       void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+       void (*start_tx)(void __iomem *ioaddr, u32 chan);
+       void (*stop_tx)(void __iomem *ioaddr, u32 chan);
+       void (*start_rx)(void __iomem *ioaddr, u32 chan);
+       void (*stop_rx)(void __iomem *ioaddr, u32 chan);
        int (*dma_interrupt) (void __iomem *ioaddr,
-                             struct stmmac_extra_stats *x);
+                             struct stmmac_extra_stats *x, u32 chan);
        /* If supported then get the optional core features */
        void (*get_hw_feature)(void __iomem *ioaddr,
                               struct dma_features *dma_cap);
        /* Program the HW RX Watchdog */
-       void (*rx_watchdog) (void __iomem *ioaddr, u32 riwt);
-       void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len);
-       void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len);
+       void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+       void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
+       void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
        void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
        void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
@@ -451,20 +474,44 @@ struct mac_device_info;
 struct stmmac_ops {
        /* MAC core initialization */
        void (*core_init)(struct mac_device_info *hw, int mtu);
+       /* Enable the MAC RX/TX */
+       void (*set_mac)(void __iomem *ioaddr, bool enable);
        /* Enable and verify that the IPC module is supported */
        int (*rx_ipc)(struct mac_device_info *hw);
        /* Enable RX Queues */
-       void (*rx_queue_enable)(struct mac_device_info *hw, u32 queue);
+       void (*rx_queue_enable)(struct mac_device_info *hw, u8 mode, u32 queue);
+       /* RX Queues Priority */
+       void (*rx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+       /* TX Queues Priority */
+       void (*tx_queue_prio)(struct mac_device_info *hw, u32 prio, u32 queue);
+       /* RX Queues Routing */
+       void (*rx_queue_routing)(struct mac_device_info *hw, u8 packet,
+                                u32 queue);
+       /* Program RX Algorithms */
+       void (*prog_mtl_rx_algorithms)(struct mac_device_info *hw, u32 rx_alg);
+       /* Program TX Algorithms */
+       void (*prog_mtl_tx_algorithms)(struct mac_device_info *hw, u32 tx_alg);
+       /* Set MTL TX queues weight */
+       void (*set_mtl_tx_queue_weight)(struct mac_device_info *hw,
+                                       u32 weight, u32 queue);
+       /* RX MTL queue to RX dma mapping */
+       void (*map_mtl_to_dma)(struct mac_device_info *hw, u32 queue, u32 chan);
+       /* Configure AV Algorithm */
+       void (*config_cbs)(struct mac_device_info *hw, u32 send_slope,
+                          u32 idle_slope, u32 high_credit, u32 low_credit,
+                          u32 queue);
        /* Dump MAC registers */
        void (*dump_regs)(struct mac_device_info *hw, u32 *reg_space);
        /* Handle extra events on specific interrupts hw dependent */
        int (*host_irq_status)(struct mac_device_info *hw,
                               struct stmmac_extra_stats *x);
+       /* Handle MTL interrupts */
+       int (*host_mtl_irq_status)(struct mac_device_info *hw, u32 chan);
        /* Multicast filter setting */
        void (*set_filter)(struct mac_device_info *hw, struct net_device *dev);
        /* Flow control setting */
        void (*flow_ctrl)(struct mac_device_info *hw, unsigned int duplex,
-                         unsigned int fc, unsigned int pause_time);
+                         unsigned int fc, unsigned int pause_time, u32 tx_cnt);
        /* Set power management mode (e.g. magic frame) */
        void (*pmt)(struct mac_device_info *hw, unsigned long mode);
        /* Set/Get Unicast MAC addresses */
@@ -477,7 +524,8 @@ struct stmmac_ops {
        void (*reset_eee_mode)(struct mac_device_info *hw);
        void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
        void (*set_eee_pls)(struct mac_device_info *hw, int link);
-       void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x);
+       void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                     u32 rx_queues, u32 tx_queues);
        /* PCS calls */
        void (*pcs_ctrl_ane)(void __iomem *ioaddr, bool ane, bool srgmi_ral,
                             bool loopback);
@@ -547,6 +595,11 @@ struct mac_device_info {
        unsigned int ps;
 };
 
+struct stmmac_rx_routing {
+       u32 reg_mask;
+       u32 reg_shift;
+};
+
 struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
                                        int perfect_uc_entries,
                                        int *synopsys_id);
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -14,16 +14,34 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/device.h>
+#include <linux/gpio/consumer.h>
 #include <linux/ethtool.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/of_net.h>
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 #include <linux/stmmac.h>
 
 #include "stmmac_platform.h"
+#include "dwmac4.h"
+
+struct tegra_eqos {
+       struct device *dev;
+       void __iomem *regs;
+
+       struct reset_control *rst;
+       struct clk *clk_master;
+       struct clk *clk_slave;
+       struct clk *clk_tx;
+       struct clk *clk_rx;
+
+       struct gpio_desc *reset;
+};
 
 static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
                                   struct plat_stmmacenet_data *plat_dat)
@@ -106,13 +124,309 @@ static int dwc_eth_dwmac_config_dt(struc
        return 0;
 }
 
+static void *dwc_qos_probe(struct platform_device *pdev,
+                          struct plat_stmmacenet_data *plat_dat,
+                          struct stmmac_resources *stmmac_res)
+{
+       int err;
+
+       plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk");
+       if (IS_ERR(plat_dat->stmmac_clk)) {
+               dev_err(&pdev->dev, "apb_pclk clock not found.\n");
+               return ERR_CAST(plat_dat->stmmac_clk);
+       }
+
+       err = clk_prepare_enable(plat_dat->stmmac_clk);
+       if (err < 0) {
+               dev_err(&pdev->dev, "failed to enable apb_pclk clock: %d\n",
+                       err);
+               return ERR_PTR(err);
+       }
+
+       plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk");
+       if (IS_ERR(plat_dat->pclk)) {
+               dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
+               err = PTR_ERR(plat_dat->pclk);
+               goto disable;
+       }
+
+       err = clk_prepare_enable(plat_dat->pclk);
+       if (err < 0) {
+               dev_err(&pdev->dev, "failed to enable phy_ref clock: %d\n",
+                       err);
+               goto disable;
+       }
+
+       return NULL;
+
+disable:
+       clk_disable_unprepare(plat_dat->stmmac_clk);
+       return ERR_PTR(err);
+}
+
+static int dwc_qos_remove(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+
+       clk_disable_unprepare(priv->plat->pclk);
+       clk_disable_unprepare(priv->plat->stmmac_clk);
+
+       return 0;
+}
+
+#define SDMEMCOMPPADCTRL 0x8800
+#define  SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD BIT(31)
+
+#define AUTO_CAL_CONFIG 0x8804
+#define  AUTO_CAL_CONFIG_START BIT(31)
+#define  AUTO_CAL_CONFIG_ENABLE BIT(29)
+
+#define AUTO_CAL_STATUS 0x880c
+#define  AUTO_CAL_STATUS_ACTIVE BIT(31)
+
+static void tegra_eqos_fix_speed(void *priv, unsigned int speed)
+{
+       struct tegra_eqos *eqos = priv;
+       unsigned long rate = 125000000;
+       bool needs_calibration = false;
+       u32 value;
+       int err;
+
+       switch (speed) {
+       case SPEED_1000:
+               needs_calibration = true;
+               rate = 125000000;
+               break;
+
+       case SPEED_100:
+               needs_calibration = true;
+               rate = 25000000;
+               break;
+
+       case SPEED_10:
+               rate = 2500000;
+               break;
+
+       default:
+               dev_err(eqos->dev, "invalid speed %u\n", speed);
+               break;
+       }
+
+       if (needs_calibration) {
+               /* calibrate */
+               value = readl(eqos->regs + SDMEMCOMPPADCTRL);
+               value |= SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
+               writel(value, eqos->regs + SDMEMCOMPPADCTRL);
+
+               udelay(1);
+
+               value = readl(eqos->regs + AUTO_CAL_CONFIG);
+               value |= AUTO_CAL_CONFIG_START | AUTO_CAL_CONFIG_ENABLE;
+               writel(value, eqos->regs + AUTO_CAL_CONFIG);
+
+               err = readl_poll_timeout_atomic(eqos->regs + AUTO_CAL_STATUS,
+                                               value,
+                                               value & AUTO_CAL_STATUS_ACTIVE,
+                                               1, 10);
+               if (err < 0) {
+                       dev_err(eqos->dev, "calibration did not start\n");
+                       goto failed;
+               }
+
+               err = readl_poll_timeout_atomic(eqos->regs + AUTO_CAL_STATUS,
+                                               value,
+                                               (value & AUTO_CAL_STATUS_ACTIVE) == 0,
+                                               20, 200);
+               if (err < 0) {
+                       dev_err(eqos->dev, "calibration didn't finish\n");
+                       goto failed;
+               }
+
+       failed:
+               value = readl(eqos->regs + SDMEMCOMPPADCTRL);
+               value &= ~SDMEMCOMPPADCTRL_PAD_E_INPUT_OR_E_PWRD;
+               writel(value, eqos->regs + SDMEMCOMPPADCTRL);
+       } else {
+               value = readl(eqos->regs + AUTO_CAL_CONFIG);
+               value &= ~AUTO_CAL_CONFIG_ENABLE;
+               writel(value, eqos->regs + AUTO_CAL_CONFIG);
+       }
+
+       err = clk_set_rate(eqos->clk_tx, rate);
+       if (err < 0)
+               dev_err(eqos->dev, "failed to set TX rate: %d\n", err);
+}
+
+static int tegra_eqos_init(struct platform_device *pdev, void *priv)
+{
+       struct tegra_eqos *eqos = priv;
+       unsigned long rate;
+       u32 value;
+
+       rate = clk_get_rate(eqos->clk_slave);
+
+       value = (rate / 1000000) - 1;
+       writel(value, eqos->regs + GMAC_1US_TIC_COUNTER);
+
+       return 0;
+}
+
+static void *tegra_eqos_probe(struct platform_device *pdev,
+                             struct plat_stmmacenet_data *data,
+                             struct stmmac_resources *res)
+{
+       struct tegra_eqos *eqos;
+       int err;
+
+       eqos = devm_kzalloc(&pdev->dev, sizeof(*eqos), GFP_KERNEL);
+       if (!eqos) {
+               err = -ENOMEM;
+               goto error;
+       }
+
+       eqos->dev = &pdev->dev;
+       eqos->regs = res->addr;
+
+       eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus");
+       if (IS_ERR(eqos->clk_master)) {
+               err = PTR_ERR(eqos->clk_master);
+               goto error;
+       }
+
+       err = clk_prepare_enable(eqos->clk_master);
+       if (err < 0)
+               goto error;
+
+       eqos->clk_slave = devm_clk_get(&pdev->dev, "slave_bus");
+       if (IS_ERR(eqos->clk_slave)) {
+               err = PTR_ERR(eqos->clk_slave);
+               goto disable_master;
+       }
+
+       data->stmmac_clk = eqos->clk_slave;
+
+       err = clk_prepare_enable(eqos->clk_slave);
+       if (err < 0)
+               goto disable_master;
+
+       eqos->clk_rx = devm_clk_get(&pdev->dev, "rx");
+       if (IS_ERR(eqos->clk_rx)) {
+               err = PTR_ERR(eqos->clk_rx);
+               goto disable_slave;
+       }
+
+       err = clk_prepare_enable(eqos->clk_rx);
+       if (err < 0)
+               goto disable_slave;
+
+       eqos->clk_tx = devm_clk_get(&pdev->dev, "tx");
+       if (IS_ERR(eqos->clk_tx)) {
+               err = PTR_ERR(eqos->clk_tx);
+               goto disable_rx;
+       }
+
+       err = clk_prepare_enable(eqos->clk_tx);
+       if (err < 0)
+               goto disable_rx;
+
+       eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(eqos->reset)) {
+               err = PTR_ERR(eqos->reset);
+               goto disable_tx;
+       }
+
+       usleep_range(2000, 4000);
+       gpiod_set_value(eqos->reset, 0);
+
+       eqos->rst = devm_reset_control_get(&pdev->dev, "eqos");
+       if (IS_ERR(eqos->rst)) {
+               err = PTR_ERR(eqos->rst);
+               goto reset_phy;
+       }
+
+       err = reset_control_assert(eqos->rst);
+       if (err < 0)
+               goto reset_phy;
+
+       usleep_range(2000, 4000);
+
+       err = reset_control_deassert(eqos->rst);
+       if (err < 0)
+               goto reset_phy;
+
+       usleep_range(2000, 4000);
+
+       data->fix_mac_speed = tegra_eqos_fix_speed;
+       data->init = tegra_eqos_init;
+       data->bsp_priv = eqos;
+
+       err = tegra_eqos_init(pdev, eqos);
+       if (err < 0)
+               goto reset;
+
+out:
+       return eqos;
+
+reset:
+       reset_control_assert(eqos->rst);
+reset_phy:
+       gpiod_set_value(eqos->reset, 1);
+disable_tx:
+       clk_disable_unprepare(eqos->clk_tx);
+disable_rx:
+       clk_disable_unprepare(eqos->clk_rx);
+disable_slave:
+       clk_disable_unprepare(eqos->clk_slave);
+disable_master:
+       clk_disable_unprepare(eqos->clk_master);
+error:
+       eqos = ERR_PTR(err);
+       goto out;
+}
+
+static int tegra_eqos_remove(struct platform_device *pdev)
+{
+       struct tegra_eqos *eqos = get_stmmac_bsp_priv(&pdev->dev);
+
+       reset_control_assert(eqos->rst);
+       gpiod_set_value(eqos->reset, 1);
+       clk_disable_unprepare(eqos->clk_tx);
+       clk_disable_unprepare(eqos->clk_rx);
+       clk_disable_unprepare(eqos->clk_slave);
+       clk_disable_unprepare(eqos->clk_master);
+
+       return 0;
+}
+
+struct dwc_eth_dwmac_data {
+       void *(*probe)(struct platform_device *pdev,
+                      struct plat_stmmacenet_data *data,
+                      struct stmmac_resources *res);
+       int (*remove)(struct platform_device *pdev);
+};
+
+static const struct dwc_eth_dwmac_data dwc_qos_data = {
+       .probe = dwc_qos_probe,
+       .remove = dwc_qos_remove,
+};
+
+static const struct dwc_eth_dwmac_data tegra_eqos_data = {
+       .probe = tegra_eqos_probe,
+       .remove = tegra_eqos_remove,
+};
+
 static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 {
+       const struct dwc_eth_dwmac_data *data;
        struct plat_stmmacenet_data *plat_dat;
        struct stmmac_resources stmmac_res;
        struct resource *res;
+       void *priv;
        int ret;
 
+       data = of_device_get_match_data(&pdev->dev);
+
        memset(&stmmac_res, 0, sizeof(struct stmmac_resources));
 
        /**
@@ -138,39 +452,26 @@ static int dwc_eth_dwmac_probe(struct pl
        if (IS_ERR(plat_dat))
                return PTR_ERR(plat_dat);
 
-       plat_dat->stmmac_clk = devm_clk_get(&pdev->dev, "apb_pclk");
-       if (IS_ERR(plat_dat->stmmac_clk)) {
-               dev_err(&pdev->dev, "apb_pclk clock not found.\n");
-               ret = PTR_ERR(plat_dat->stmmac_clk);
-               plat_dat->stmmac_clk = NULL;
-               goto err_remove_config_dt;
+       priv = data->probe(pdev, plat_dat, &stmmac_res);
+       if (IS_ERR(priv)) {
+               ret = PTR_ERR(priv);
+               dev_err(&pdev->dev, "failed to probe subdriver: %d\n", ret);
+               goto remove_config;
        }
-       clk_prepare_enable(plat_dat->stmmac_clk);
-
-       plat_dat->pclk = devm_clk_get(&pdev->dev, "phy_ref_clk");
-       if (IS_ERR(plat_dat->pclk)) {
-               dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
-               ret = PTR_ERR(plat_dat->pclk);
-               plat_dat->pclk = NULL;
-               goto err_out_clk_dis_phy;
-       }
-       clk_prepare_enable(plat_dat->pclk);
 
        ret = dwc_eth_dwmac_config_dt(pdev, plat_dat);
        if (ret)
-               goto err_out_clk_dis_aper;
+               goto remove;
 
        ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
        if (ret)
-               goto err_out_clk_dis_aper;
+               goto remove;
 
-       return 0;
+       return ret;
 
-err_out_clk_dis_aper:
-       clk_disable_unprepare(plat_dat->pclk);
-err_out_clk_dis_phy:
-       clk_disable_unprepare(plat_dat->stmmac_clk);
-err_remove_config_dt:
+remove:
+       data->remove(pdev);
+remove_config:
        stmmac_remove_config_dt(pdev, plat_dat);
 
        return ret;
@@ -178,11 +479,29 @@ err_remove_config_dt:
 
 static int dwc_eth_dwmac_remove(struct platform_device *pdev)
 {
-       return stmmac_pltfr_remove(pdev);
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       const struct dwc_eth_dwmac_data *data;
+       int err;
+
+       data = of_device_get_match_data(&pdev->dev);
+
+       err = stmmac_dvr_remove(&pdev->dev);
+       if (err < 0)
+               dev_err(&pdev->dev, "failed to remove platform: %d\n", err);
+
+       err = data->remove(pdev);
+       if (err < 0)
+               dev_err(&pdev->dev, "failed to remove subdriver: %d\n", err);
+
+       stmmac_remove_config_dt(pdev, priv->plat);
+
+       return err;
 }
 
 static const struct of_device_id dwc_eth_dwmac_match[] = {
-       { .compatible = "snps,dwc-qos-ethernet-4.10", },
+       { .compatible = "snps,dwc-qos-ethernet-4.10", .data = &dwc_qos_data },
+       { .compatible = "nvidia,tegra186-eqos", .data = &tegra_eqos_data },
        { }
 };
 MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match);
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -74,6 +74,10 @@ struct rk_priv_data {
 #define GRF_BIT(nr)    (BIT(nr) | BIT(nr+16))
 #define GRF_CLR_BIT(nr)        (BIT(nr+16))
 
+#define DELAY_ENABLE(soc, tx, rx) \
+       (((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \
+        ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE))
+
 #define RK3228_GRF_MAC_CON0    0x0900
 #define RK3228_GRF_MAC_CON1    0x0904
 
@@ -115,8 +119,7 @@ static void rk3228_set_to_rgmii(struct r
        regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON1,
                     RK3228_GMAC_PHY_INTF_SEL_RGMII |
                     RK3228_GMAC_RMII_MODE_CLR |
-                    RK3228_GMAC_RXCLK_DLY_ENABLE |
-                    RK3228_GMAC_TXCLK_DLY_ENABLE);
+                    DELAY_ENABLE(RK3228, tx_delay, rx_delay));
 
        regmap_write(bsp_priv->grf, RK3228_GRF_MAC_CON0,
                     RK3228_GMAC_CLK_RX_DL_CFG(rx_delay) |
@@ -232,8 +235,7 @@ static void rk3288_set_to_rgmii(struct r
                     RK3288_GMAC_PHY_INTF_SEL_RGMII |
                     RK3288_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON3,
-                    RK3288_GMAC_RXCLK_DLY_ENABLE |
-                    RK3288_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3288, tx_delay, rx_delay) |
                     RK3288_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3288_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -460,8 +462,7 @@ static void rk3366_set_to_rgmii(struct r
                     RK3366_GMAC_PHY_INTF_SEL_RGMII |
                     RK3366_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7,
-                    RK3366_GMAC_RXCLK_DLY_ENABLE |
-                    RK3366_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3366, tx_delay, rx_delay) |
                     RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3366_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -572,8 +573,7 @@ static void rk3368_set_to_rgmii(struct r
                     RK3368_GMAC_PHY_INTF_SEL_RGMII |
                     RK3368_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3368_GRF_SOC_CON16,
-                    RK3368_GMAC_RXCLK_DLY_ENABLE |
-                    RK3368_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3368, tx_delay, rx_delay) |
                     RK3368_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3368_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -684,8 +684,7 @@ static void rk3399_set_to_rgmii(struct r
                     RK3399_GMAC_PHY_INTF_SEL_RGMII |
                     RK3399_GMAC_RMII_MODE_CLR);
        regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6,
-                    RK3399_GMAC_RXCLK_DLY_ENABLE |
-                    RK3399_GMAC_TXCLK_DLY_ENABLE |
+                    DELAY_ENABLE(RK3399, tx_delay, rx_delay) |
                     RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) |
                     RK3399_GMAC_CLK_TX_DL_CFG(tx_delay));
 }
@@ -985,14 +984,29 @@ static int rk_gmac_powerup(struct rk_pri
                return ret;
 
        /*rmii or rgmii*/
-       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
+       switch (bsp_priv->phy_iface) {
+       case PHY_INTERFACE_MODE_RGMII:
                dev_info(dev, "init for RGMII\n");
                bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay,
                                            bsp_priv->rx_delay);
-       } else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) {
+               break;
+       case PHY_INTERFACE_MODE_RGMII_ID:
+               dev_info(dev, "init for RGMII_ID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, 0, 0);
+               break;
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+               dev_info(dev, "init for RGMII_RXID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, bsp_priv->tx_delay, 0);
+               break;
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               dev_info(dev, "init for RGMII_TXID\n");
+               bsp_priv->ops->set_to_rgmii(bsp_priv, 0, bsp_priv->rx_delay);
+               break;
+       case PHY_INTERFACE_MODE_RMII:
                dev_info(dev, "init for RMII\n");
                bsp_priv->ops->set_to_rmii(bsp_priv);
-       } else {
+               break;
+       default:
                dev_err(dev, "NO interface defined!\n");
        }
 
@@ -1022,12 +1036,19 @@ static void rk_fix_speed(void *priv, uns
        struct rk_priv_data *bsp_priv = priv;
        struct device *dev = &bsp_priv->pdev->dev;
 
-       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII)
+       switch (bsp_priv->phy_iface) {
+       case PHY_INTERFACE_MODE_RGMII:
+       case PHY_INTERFACE_MODE_RGMII_ID:
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
                bsp_priv->ops->set_rgmii_speed(bsp_priv, speed);
-       else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
+               break;
+       case PHY_INTERFACE_MODE_RMII:
                bsp_priv->ops->set_rmii_speed(bsp_priv, speed);
-       else
+               break;
+       default:
                dev_err(dev, "unsupported interface %d", bsp_priv->phy_iface);
+       }
 }
 
 static int rk_gmac_probe(struct platform_device *pdev)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -216,7 +216,8 @@ static void dwmac1000_set_filter(struct
 
 
 static void dwmac1000_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                               unsigned int fc, unsigned int pause_time)
+                               unsigned int fc, unsigned int pause_time,
+                               u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
        /* Set flow such that DZPQ in Mac Register 6 is 0,
@@ -412,7 +413,8 @@ static void dwmac1000_get_adv_lp(void __
        dwmac_get_adv_lp(ioaddr, GMAC_PCS_BASE, adv);
 }
 
-static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                           u32 rx_queues, u32 tx_queues)
 {
        u32 value = readl(ioaddr + GMAC_DEBUG);
 
@@ -488,6 +490,7 @@ static void dwmac1000_debug(void __iomem
 
 static const struct stmmac_ops dwmac1000_ops = {
        .core_init = dwmac1000_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac1000_rx_ipc_enable,
        .dump_regs = dwmac1000_dump_regs,
        .host_irq_status = dwmac1000_irq_status,
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -247,7 +247,8 @@ static void dwmac1000_get_hw_feature(voi
        dma_cap->enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24;
 }
 
-static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt)
+static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
+                                 u32 number_chan)
 {
        writel(riwt, ioaddr + DMA_RX_WATCHDOG);
 }
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -131,7 +131,8 @@ static void dwmac100_set_filter(struct m
 }
 
 static void dwmac100_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                              unsigned int fc, unsigned int pause_time)
+                              unsigned int fc, unsigned int pause_time,
+                              u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
        unsigned int flow = MAC_FLOW_CTRL_ENABLE;
@@ -149,6 +150,7 @@ static void dwmac100_pmt(struct mac_devi
 
 static const struct stmmac_ops dwmac100_ops = {
        .core_init = dwmac100_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac100_rx_ipc_enable,
        .dump_regs = dwmac100_dump_mac_regs,
        .host_irq_status = dwmac100_irq_status,
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -22,9 +22,15 @@
 #define GMAC_HASH_TAB_32_63            0x00000014
 #define GMAC_RX_FLOW_CTRL              0x00000090
 #define GMAC_QX_TX_FLOW_CTRL(x)                (0x70 + x * 4)
+#define GMAC_TXQ_PRTY_MAP0             0x98
+#define GMAC_TXQ_PRTY_MAP1             0x9C
 #define GMAC_RXQ_CTRL0                 0x000000a0
+#define GMAC_RXQ_CTRL1                 0x000000a4
+#define GMAC_RXQ_CTRL2                 0x000000a8
+#define GMAC_RXQ_CTRL3                 0x000000ac
 #define GMAC_INT_STATUS                        0x000000b0
 #define GMAC_INT_EN                    0x000000b4
+#define GMAC_1US_TIC_COUNTER           0x000000dc
 #define GMAC_PCS_BASE                  0x000000e0
 #define GMAC_PHYIF_CONTROL_STATUS      0x000000f8
 #define GMAC_PMT                       0x000000c0
@@ -38,6 +44,22 @@
 #define GMAC_ADDR_HIGH(reg)            (0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)             (0x304 + reg * 8)
 
+/* RX Queues Routing */
+#define GMAC_RXQCTRL_AVCPQ_MASK                GENMASK(2, 0)
+#define GMAC_RXQCTRL_AVCPQ_SHIFT       0
+#define GMAC_RXQCTRL_PTPQ_MASK         GENMASK(6, 4)
+#define GMAC_RXQCTRL_PTPQ_SHIFT                4
+#define GMAC_RXQCTRL_DCBCPQ_MASK       GENMASK(10, 8)
+#define GMAC_RXQCTRL_DCBCPQ_SHIFT      8
+#define GMAC_RXQCTRL_UPQ_MASK          GENMASK(14, 12)
+#define GMAC_RXQCTRL_UPQ_SHIFT         12
+#define GMAC_RXQCTRL_MCBCQ_MASK                GENMASK(18, 16)
+#define GMAC_RXQCTRL_MCBCQ_SHIFT       16
+#define GMAC_RXQCTRL_MCBCQEN           BIT(20)
+#define GMAC_RXQCTRL_MCBCQEN_SHIFT     20
+#define GMAC_RXQCTRL_TACPQE            BIT(21)
+#define GMAC_RXQCTRL_TACPQE_SHIFT      21
+
 /* MAC Packet Filtering */
 #define GMAC_PACKET_FILTER_PR          BIT(0)
 #define GMAC_PACKET_FILTER_HMC         BIT(2)
@@ -53,6 +75,14 @@
 /* MAC Flow Control RX */
 #define GMAC_RX_FLOW_CTRL_RFE          BIT(0)
 
+/* RX Queues Priorities */
+#define GMAC_RXQCTRL_PSRQX_MASK(x)     GENMASK(7 + ((x) * 8), 0 + ((x) * 8))
+#define GMAC_RXQCTRL_PSRQX_SHIFT(x)    ((x) * 8)
+
+/* TX Queues Priorities */
+#define GMAC_TXQCTRL_PSTQX_MASK(x)     GENMASK(7 + ((x) * 8), 0 + ((x) * 8))
+#define GMAC_TXQCTRL_PSTQX_SHIFT(x)    ((x) * 8)
+
 /* MAC Flow Control TX */
 #define GMAC_TX_FLOW_CTRL_TFE          BIT(1)
 #define GMAC_TX_FLOW_CTRL_PT_SHIFT     16
@@ -148,6 +178,8 @@ enum power_event {
 /* MAC HW features1 bitmap */
 #define GMAC_HW_FEAT_AVSEL             BIT(20)
 #define GMAC_HW_TSOEN                  BIT(18)
+#define GMAC_HW_TXFIFOSIZE             GENMASK(10, 6)
+#define GMAC_HW_RXFIFOSIZE             GENMASK(4, 0)
 
 /* MAC HW features2 bitmap */
 #define GMAC_HW_FEAT_TXCHCNT           GENMASK(21, 18)
@@ -161,8 +193,25 @@ enum power_event {
 #define GMAC_HI_REG_AE                 BIT(31)
 
 /*  MTL registers */
+#define MTL_OPERATION_MODE             0x00000c00
+#define MTL_OPERATION_SCHALG_MASK      GENMASK(6, 5)
+#define MTL_OPERATION_SCHALG_WRR       (0x0 << 5)
+#define MTL_OPERATION_SCHALG_WFQ       (0x1 << 5)
+#define MTL_OPERATION_SCHALG_DWRR      (0x2 << 5)
+#define MTL_OPERATION_SCHALG_SP                (0x3 << 5)
+#define MTL_OPERATION_RAA              BIT(2)
+#define MTL_OPERATION_RAA_SP           (0x0 << 2)
+#define MTL_OPERATION_RAA_WSP          (0x1 << 2)
+
 #define MTL_INT_STATUS                 0x00000c20
-#define MTL_INT_Q0                     BIT(0)
+#define MTL_INT_QX(x)                  BIT(x)
+
+#define MTL_RXQ_DMA_MAP0               0x00000c30 /* queue 0 to 3 */
+#define MTL_RXQ_DMA_MAP1               0x00000c34 /* queue 4 to 7 */
+#define MTL_RXQ_DMA_Q04MDMACH_MASK     GENMASK(3, 0)
+#define MTL_RXQ_DMA_Q04MDMACH(x)       ((x) << 0)
+#define MTL_RXQ_DMA_QXMDMACH_MASK(x)   GENMASK(11 + (8 * ((x) - 1)), 8 * (x))
+#define MTL_RXQ_DMA_QXMDMACH(chan, q)  ((chan) << (8 * (q)))
 
 #define MTL_CHAN_BASE_ADDR             0x00000d00
 #define MTL_CHAN_BASE_OFFSET           0x40
@@ -180,6 +229,7 @@ enum power_event {
 #define MTL_OP_MODE_TSF                        BIT(1)
 
 #define MTL_OP_MODE_TQS_MASK           GENMASK(24, 16)
+#define MTL_OP_MODE_TQS_SHIFT          16
 
 #define MTL_OP_MODE_TTC_MASK           0x70
 #define MTL_OP_MODE_TTC_SHIFT          4
@@ -193,6 +243,17 @@ enum power_event {
 #define MTL_OP_MODE_TTC_384            (6 << MTL_OP_MODE_TTC_SHIFT)
 #define MTL_OP_MODE_TTC_512            (7 << MTL_OP_MODE_TTC_SHIFT)
 
+#define MTL_OP_MODE_RQS_MASK           GENMASK(29, 20)
+#define MTL_OP_MODE_RQS_SHIFT          20
+
+#define MTL_OP_MODE_RFD_MASK           GENMASK(19, 14)
+#define MTL_OP_MODE_RFD_SHIFT          14
+
+#define MTL_OP_MODE_RFA_MASK           GENMASK(13, 8)
+#define MTL_OP_MODE_RFA_SHIFT          8
+
+#define MTL_OP_MODE_EHFC               BIT(7)
+
 #define MTL_OP_MODE_RTC_MASK           0x18
 #define MTL_OP_MODE_RTC_SHIFT          3
 
@@ -201,6 +262,46 @@ enum power_event {
 #define MTL_OP_MODE_RTC_96             (2 << MTL_OP_MODE_RTC_SHIFT)
 #define MTL_OP_MODE_RTC_128            (3 << MTL_OP_MODE_RTC_SHIFT)
 
+/* MTL ETS Control register */
+#define MTL_ETS_CTRL_BASE_ADDR         0x00000d10
+#define MTL_ETS_CTRL_BASE_OFFSET       0x40
+#define MTL_ETSX_CTRL_BASE_ADDR(x)     (MTL_ETS_CTRL_BASE_ADDR + \
+                                       ((x) * MTL_ETS_CTRL_BASE_OFFSET))
+
+#define MTL_ETS_CTRL_CC                        BIT(3)
+#define MTL_ETS_CTRL_AVALG             BIT(2)
+
+/* MTL Queue Quantum Weight */
+#define MTL_TXQ_WEIGHT_BASE_ADDR       0x00000d18
+#define MTL_TXQ_WEIGHT_BASE_OFFSET     0x40
+#define MTL_TXQX_WEIGHT_BASE_ADDR(x)   (MTL_TXQ_WEIGHT_BASE_ADDR + \
+                                       ((x) * MTL_TXQ_WEIGHT_BASE_OFFSET))
+#define MTL_TXQ_WEIGHT_ISCQW_MASK      GENMASK(20, 0)
+
+/* MTL sendSlopeCredit register */
+#define MTL_SEND_SLP_CRED_BASE_ADDR    0x00000d1c
+#define MTL_SEND_SLP_CRED_OFFSET       0x40
+#define MTL_SEND_SLP_CREDX_BASE_ADDR(x)        (MTL_SEND_SLP_CRED_BASE_ADDR + \
+                                       ((x) * MTL_SEND_SLP_CRED_OFFSET))
+
+#define MTL_SEND_SLP_CRED_SSC_MASK     GENMASK(13, 0)
+
+/* MTL hiCredit register */
+#define MTL_HIGH_CRED_BASE_ADDR                0x00000d20
+#define MTL_HIGH_CRED_OFFSET           0x40
+#define MTL_HIGH_CREDX_BASE_ADDR(x)    (MTL_HIGH_CRED_BASE_ADDR + \
+                                       ((x) * MTL_HIGH_CRED_OFFSET))
+
+#define MTL_HIGH_CRED_HC_MASK          GENMASK(28, 0)
+
+/* MTL loCredit register */
+#define MTL_LOW_CRED_BASE_ADDR         0x00000d24
+#define MTL_LOW_CRED_OFFSET            0x40
+#define MTL_LOW_CREDX_BASE_ADDR(x)     (MTL_LOW_CRED_BASE_ADDR + \
+                                       ((x) * MTL_LOW_CRED_OFFSET))
+
+#define MTL_HIGH_CRED_LC_MASK          GENMASK(28, 0)
+
 /*  MTL debug */
 #define MTL_DEBUG_TXSTSFSTS            BIT(5)
 #define MTL_DEBUG_TXFSTS               BIT(4)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -59,17 +59,211 @@ static void dwmac4_core_init(struct mac_
        writel(value, ioaddr + GMAC_INT_EN);
 }
 
-static void dwmac4_rx_queue_enable(struct mac_device_info *hw, u32 queue)
+static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
+                                  u8 mode, u32 queue)
 {
        void __iomem *ioaddr = hw->pcsr;
        u32 value = readl(ioaddr + GMAC_RXQ_CTRL0);
 
        value &= GMAC_RX_QUEUE_CLEAR(queue);
-       value |= GMAC_RX_AV_QUEUE_ENABLE(queue);
+       if (mode == MTL_QUEUE_AVB)
+               value |= GMAC_RX_AV_QUEUE_ENABLE(queue);
+       else if (mode == MTL_QUEUE_DCB)
+               value |= GMAC_RX_DCB_QUEUE_ENABLE(queue);
 
        writel(value, ioaddr + GMAC_RXQ_CTRL0);
 }
 
+static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
+                                    u32 prio, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 base_register;
+       u32 value;
+
+       base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
+
+       value = readl(ioaddr + base_register);
+
+       value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
+       value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+                                               GMAC_RXQCTRL_PSRQX_MASK(queue);
+       writel(value, ioaddr + base_register);
+}
+
+static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
+                                    u32 prio, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 base_register;
+       u32 value;
+
+       base_register = (queue < 4) ? GMAC_TXQ_PRTY_MAP0 : GMAC_TXQ_PRTY_MAP1;
+
+       value = readl(ioaddr + base_register);
+
+       value &= ~GMAC_TXQCTRL_PSTQX_MASK(queue);
+       value |= (prio << GMAC_TXQCTRL_PSTQX_SHIFT(queue)) &
+                                               GMAC_TXQCTRL_PSTQX_MASK(queue);
+
+       writel(value, ioaddr + base_register);
+}
+
+static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+                                   u8 packet, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       const struct stmmac_rx_routing route_possibilities[] = {
+               { GMAC_RXQCTRL_AVCPQ_MASK, GMAC_RXQCTRL_AVCPQ_SHIFT },
+               { GMAC_RXQCTRL_PTPQ_MASK, GMAC_RXQCTRL_PTPQ_SHIFT },
+               { GMAC_RXQCTRL_DCBCPQ_MASK, GMAC_RXQCTRL_DCBCPQ_SHIFT },
+               { GMAC_RXQCTRL_UPQ_MASK, GMAC_RXQCTRL_UPQ_SHIFT },
+               { GMAC_RXQCTRL_MCBCQ_MASK, GMAC_RXQCTRL_MCBCQ_SHIFT },
+       };
+
+       value = readl(ioaddr + GMAC_RXQ_CTRL1);
+
+       /* routing configuration */
+       value &= ~route_possibilities[packet - 1].reg_mask;
+       value |= (queue << route_possibilities[packet-1].reg_shift) &
+                route_possibilities[packet - 1].reg_mask;
+
+       /* some packets require extra ops */
+       if (packet == PACKET_AVCPQ) {
+               value &= ~GMAC_RXQCTRL_TACPQE;
+               value |= 0x1 << GMAC_RXQCTRL_TACPQE_SHIFT;
+       } else if (packet == PACKET_MCBCQ) {
+               value &= ~GMAC_RXQCTRL_MCBCQEN;
+               value |= 0x1 << GMAC_RXQCTRL_MCBCQEN_SHIFT;
+       }
+
+       writel(value, ioaddr + GMAC_RXQ_CTRL1);
+}
+
+static void dwmac4_prog_mtl_rx_algorithms(struct mac_device_info *hw,
+                                         u32 rx_alg)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_OPERATION_MODE);
+
+       value &= ~MTL_OPERATION_RAA;
+       switch (rx_alg) {
+       case MTL_RX_ALGORITHM_SP:
+               value |= MTL_OPERATION_RAA_SP;
+               break;
+       case MTL_RX_ALGORITHM_WSP:
+               value |= MTL_OPERATION_RAA_WSP;
+               break;
+       default:
+               break;
+       }
+
+       writel(value, ioaddr + MTL_OPERATION_MODE);
+}
+
+static void dwmac4_prog_mtl_tx_algorithms(struct mac_device_info *hw,
+                                         u32 tx_alg)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_OPERATION_MODE);
+
+       value &= ~MTL_OPERATION_SCHALG_MASK;
+       switch (tx_alg) {
+       case MTL_TX_ALGORITHM_WRR:
+               value |= MTL_OPERATION_SCHALG_WRR;
+               break;
+       case MTL_TX_ALGORITHM_WFQ:
+               value |= MTL_OPERATION_SCHALG_WFQ;
+               break;
+       case MTL_TX_ALGORITHM_DWRR:
+               value |= MTL_OPERATION_SCHALG_DWRR;
+               break;
+       case MTL_TX_ALGORITHM_SP:
+               value |= MTL_OPERATION_SCHALG_SP;
+               break;
+       default:
+               break;
+       }
+}
+
+static void dwmac4_set_mtl_tx_queue_weight(struct mac_device_info *hw,
+                                          u32 weight, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value = readl(ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+
+       value &= ~MTL_TXQ_WEIGHT_ISCQW_MASK;
+       value |= weight & MTL_TXQ_WEIGHT_ISCQW_MASK;
+       writel(value, ioaddr + MTL_TXQX_WEIGHT_BASE_ADDR(queue));
+}
+
+static void dwmac4_map_mtl_dma(struct mac_device_info *hw, u32 queue, u32 chan)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       if (queue < 4)
+               value = readl(ioaddr + MTL_RXQ_DMA_MAP0);
+       else
+               value = readl(ioaddr + MTL_RXQ_DMA_MAP1);
+
+       if (queue == 0 || queue == 4) {
+               value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
+               value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+       } else {
+               value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
+               value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
+       }
+
+       if (queue < 4)
+               writel(value, ioaddr + MTL_RXQ_DMA_MAP0);
+       else
+               writel(value, ioaddr + MTL_RXQ_DMA_MAP1);
+}
+
+static void dwmac4_config_cbs(struct mac_device_info *hw,
+                             u32 send_slope, u32 idle_slope,
+                             u32 high_credit, u32 low_credit, u32 queue)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 value;
+
+       pr_debug("Queue %d configured as AVB. Parameters:\n", queue);
+       pr_debug("\tsend_slope: 0x%08x\n", send_slope);
+       pr_debug("\tidle_slope: 0x%08x\n", idle_slope);
+       pr_debug("\thigh_credit: 0x%08x\n", high_credit);
+       pr_debug("\tlow_credit: 0x%08x\n", low_credit);
+
+       /* enable AV algorithm */
+       value = readl(ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+       value |= MTL_ETS_CTRL_AVALG;
+       value |= MTL_ETS_CTRL_CC;
+       writel(value, ioaddr + MTL_ETSX_CTRL_BASE_ADDR(queue));
+
+       /* configure send slope */
+       value = readl(ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_SEND_SLP_CRED_SSC_MASK;
+       value |= send_slope & MTL_SEND_SLP_CRED_SSC_MASK;
+       writel(value, ioaddr + MTL_SEND_SLP_CREDX_BASE_ADDR(queue));
+
+       /* configure idle slope (same register as tx weight) */
+       dwmac4_set_mtl_tx_queue_weight(hw, idle_slope, queue);
+
+       /* configure high credit */
+       value = readl(ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_HIGH_CRED_HC_MASK;
+       value |= high_credit & MTL_HIGH_CRED_HC_MASK;
+       writel(value, ioaddr + MTL_HIGH_CREDX_BASE_ADDR(queue));
+
+       /* configure high credit */
+       value = readl(ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+       value &= ~MTL_HIGH_CRED_LC_MASK;
+       value |= low_credit & MTL_HIGH_CRED_LC_MASK;
+       writel(value, ioaddr + MTL_LOW_CREDX_BASE_ADDR(queue));
+}
+
 static void dwmac4_dump_regs(struct mac_device_info *hw, u32 *reg_space)
 {
        void __iomem *ioaddr = hw->pcsr;
@@ -251,11 +445,12 @@ static void dwmac4_set_filter(struct mac
 }
 
 static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-                            unsigned int fc, unsigned int pause_time)
+                            unsigned int fc, unsigned int pause_time,
+                            u32 tx_cnt)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 channel = STMMAC_CHAN0;     /* FIXME */
        unsigned int flow = 0;
+       u32 queue = 0;
 
        pr_debug("GMAC Flow-Control:\n");
        if (fc & FLOW_RX) {
@@ -265,13 +460,18 @@ static void dwmac4_flow_ctrl(struct mac_
        }
        if (fc & FLOW_TX) {
                pr_debug("\tTransmit Flow-Control ON\n");
-               flow |= GMAC_TX_FLOW_CTRL_TFE;
-               writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(channel));
 
-               if (duplex) {
+               if (duplex)
                        pr_debug("\tduplex mode: PAUSE %d\n", pause_time);
-                       flow |= (pause_time << GMAC_TX_FLOW_CTRL_PT_SHIFT);
-                       writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(channel));
+
+               for (queue = 0; queue < tx_cnt; queue++) {
+                       flow |= GMAC_TX_FLOW_CTRL_TFE;
+
+                       if (duplex)
+                               flow |=
+                               (pause_time << GMAC_TX_FLOW_CTRL_PT_SHIFT);
+
+                       writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue));
                }
        }
 }
@@ -325,11 +525,34 @@ static void dwmac4_phystatus(void __iome
        }
 }
 
+static int dwmac4_irq_mtl_status(struct mac_device_info *hw, u32 chan)
+{
+       void __iomem *ioaddr = hw->pcsr;
+       u32 mtl_int_qx_status;
+       int ret = 0;
+
+       mtl_int_qx_status = readl(ioaddr + MTL_INT_STATUS);
+
+       /* Check MTL Interrupt */
+       if (mtl_int_qx_status & MTL_INT_QX(chan)) {
+               /* read Queue x Interrupt status */
+               u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(chan));
+
+               if (status & MTL_RX_OVERFLOW_INT) {
+                       /*  clear Interrupt */
+                       writel(status | MTL_RX_OVERFLOW_INT,
+                              ioaddr + MTL_CHAN_INT_CTRL(chan));
+                       ret = CORE_IRQ_MTL_RX_OVERFLOW;
+               }
+       }
+
+       return ret;
+}
+
 static int dwmac4_irq_status(struct mac_device_info *hw,
                             struct stmmac_extra_stats *x)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 mtl_int_qx_status;
        u32 intr_status;
        int ret = 0;
 
@@ -348,20 +571,6 @@ static int dwmac4_irq_status(struct mac_
                x->irq_receive_pmt_irq_n++;
        }
 
-       mtl_int_qx_status = readl(ioaddr + MTL_INT_STATUS);
-       /* Check MTL Interrupt: Currently only one queue is used: Q0. */
-       if (mtl_int_qx_status & MTL_INT_Q0) {
-               /* read Queue 0 Interrupt status */
-               u32 status = readl(ioaddr + MTL_CHAN_INT_CTRL(STMMAC_CHAN0));
-
-               if (status & MTL_RX_OVERFLOW_INT) {
-                       /*  clear Interrupt */
-                       writel(status | MTL_RX_OVERFLOW_INT,
-                              ioaddr + MTL_CHAN_INT_CTRL(STMMAC_CHAN0));
-                       ret = CORE_IRQ_MTL_RX_OVERFLOW;
-               }
-       }
-
        dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x);
        if (intr_status & PCS_RGSMIIIS_IRQ)
                dwmac4_phystatus(ioaddr, x);
@@ -369,64 +578,69 @@ static int dwmac4_irq_status(struct mac_
        return ret;
 }
 
-static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x)
+static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                        u32 rx_queues, u32 tx_queues)
 {
        u32 value;
+       u32 queue;
 
-       /*  Currently only channel 0 is supported */
-       value = readl(ioaddr + MTL_CHAN_TX_DEBUG(STMMAC_CHAN0));
+       for (queue = 0; queue < tx_queues; queue++) {
+               value = readl(ioaddr + MTL_CHAN_TX_DEBUG(queue));
 
-       if (value & MTL_DEBUG_TXSTSFSTS)
-               x->mtl_tx_status_fifo_full++;
-       if (value & MTL_DEBUG_TXFSTS)
-               x->mtl_tx_fifo_not_empty++;
-       if (value & MTL_DEBUG_TWCSTS)
-               x->mmtl_fifo_ctrl++;
-       if (value & MTL_DEBUG_TRCSTS_MASK) {
-               u32 trcsts = (value & MTL_DEBUG_TRCSTS_MASK)
-                            >> MTL_DEBUG_TRCSTS_SHIFT;
-               if (trcsts == MTL_DEBUG_TRCSTS_WRITE)
-                       x->mtl_tx_fifo_read_ctrl_write++;
-               else if (trcsts == MTL_DEBUG_TRCSTS_TXW)
-                       x->mtl_tx_fifo_read_ctrl_wait++;
-               else if (trcsts == MTL_DEBUG_TRCSTS_READ)
-                       x->mtl_tx_fifo_read_ctrl_read++;
-               else
-                       x->mtl_tx_fifo_read_ctrl_idle++;
+               if (value & MTL_DEBUG_TXSTSFSTS)
+                       x->mtl_tx_status_fifo_full++;
+               if (value & MTL_DEBUG_TXFSTS)
+                       x->mtl_tx_fifo_not_empty++;
+               if (value & MTL_DEBUG_TWCSTS)
+                       x->mmtl_fifo_ctrl++;
+               if (value & MTL_DEBUG_TRCSTS_MASK) {
+                       u32 trcsts = (value & MTL_DEBUG_TRCSTS_MASK)
+                                    >> MTL_DEBUG_TRCSTS_SHIFT;
+                       if (trcsts == MTL_DEBUG_TRCSTS_WRITE)
+                               x->mtl_tx_fifo_read_ctrl_write++;
+                       else if (trcsts == MTL_DEBUG_TRCSTS_TXW)
+                               x->mtl_tx_fifo_read_ctrl_wait++;
+                       else if (trcsts == MTL_DEBUG_TRCSTS_READ)
+                               x->mtl_tx_fifo_read_ctrl_read++;
+                       else
+                               x->mtl_tx_fifo_read_ctrl_idle++;
+               }
+               if (value & MTL_DEBUG_TXPAUSED)
+                       x->mac_tx_in_pause++;
        }
-       if (value & MTL_DEBUG_TXPAUSED)
-               x->mac_tx_in_pause++;
 
-       value = readl(ioaddr + MTL_CHAN_RX_DEBUG(STMMAC_CHAN0));
+       for (queue = 0; queue < rx_queues; queue++) {
+               value = readl(ioaddr + MTL_CHAN_RX_DEBUG(queue));
 
-       if (value & MTL_DEBUG_RXFSTS_MASK) {
-               u32 rxfsts = (value & MTL_DEBUG_RXFSTS_MASK)
-                            >> MTL_DEBUG_RRCSTS_SHIFT;
-
-               if (rxfsts == MTL_DEBUG_RXFSTS_FULL)
-                       x->mtl_rx_fifo_fill_level_full++;
-               else if (rxfsts == MTL_DEBUG_RXFSTS_AT)
-                       x->mtl_rx_fifo_fill_above_thresh++;
-               else if (rxfsts == MTL_DEBUG_RXFSTS_BT)
-                       x->mtl_rx_fifo_fill_below_thresh++;
-               else
-                       x->mtl_rx_fifo_fill_level_empty++;
-       }
-       if (value & MTL_DEBUG_RRCSTS_MASK) {
-               u32 rrcsts = (value & MTL_DEBUG_RRCSTS_MASK) >>
-                            MTL_DEBUG_RRCSTS_SHIFT;
-
-               if (rrcsts == MTL_DEBUG_RRCSTS_FLUSH)
-                       x->mtl_rx_fifo_read_ctrl_flush++;
-               else if (rrcsts == MTL_DEBUG_RRCSTS_RSTAT)
-                       x->mtl_rx_fifo_read_ctrl_read_data++;
-               else if (rrcsts == MTL_DEBUG_RRCSTS_RDATA)
-                       x->mtl_rx_fifo_read_ctrl_status++;
-               else
-                       x->mtl_rx_fifo_read_ctrl_idle++;
+               if (value & MTL_DEBUG_RXFSTS_MASK) {
+                       u32 rxfsts = (value & MTL_DEBUG_RXFSTS_MASK)
+                                    >> MTL_DEBUG_RRCSTS_SHIFT;
+
+                       if (rxfsts == MTL_DEBUG_RXFSTS_FULL)
+                               x->mtl_rx_fifo_fill_level_full++;
+                       else if (rxfsts == MTL_DEBUG_RXFSTS_AT)
+                               x->mtl_rx_fifo_fill_above_thresh++;
+                       else if (rxfsts == MTL_DEBUG_RXFSTS_BT)
+                               x->mtl_rx_fifo_fill_below_thresh++;
+                       else
+                               x->mtl_rx_fifo_fill_level_empty++;
+               }
+               if (value & MTL_DEBUG_RRCSTS_MASK) {
+                       u32 rrcsts = (value & MTL_DEBUG_RRCSTS_MASK) >>
+                                    MTL_DEBUG_RRCSTS_SHIFT;
+
+                       if (rrcsts == MTL_DEBUG_RRCSTS_FLUSH)
+                               x->mtl_rx_fifo_read_ctrl_flush++;
+                       else if (rrcsts == MTL_DEBUG_RRCSTS_RSTAT)
+                               x->mtl_rx_fifo_read_ctrl_read_data++;
+                       else if (rrcsts == MTL_DEBUG_RRCSTS_RDATA)
+                               x->mtl_rx_fifo_read_ctrl_status++;
+                       else
+                               x->mtl_rx_fifo_read_ctrl_idle++;
+               }
+               if (value & MTL_DEBUG_RWCSTS)
+                       x->mtl_rx_fifo_ctrl_active++;
        }
-       if (value & MTL_DEBUG_RWCSTS)
-               x->mtl_rx_fifo_ctrl_active++;
 
        /* GMAC debug */
        value = readl(ioaddr + GMAC_DEBUG);
@@ -455,10 +669,51 @@ static void dwmac4_debug(void __iomem *i
 
 static const struct stmmac_ops dwmac4_ops = {
        .core_init = dwmac4_core_init,
+       .set_mac = stmmac_set_mac,
        .rx_ipc = dwmac4_rx_ipc_enable,
        .rx_queue_enable = dwmac4_rx_queue_enable,
+       .rx_queue_prio = dwmac4_rx_queue_priority,
+       .tx_queue_prio = dwmac4_tx_queue_priority,
+       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
+       .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
+       .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
+       .map_mtl_to_dma = dwmac4_map_mtl_dma,
+       .config_cbs = dwmac4_config_cbs,
        .dump_regs = dwmac4_dump_regs,
        .host_irq_status = dwmac4_irq_status,
+       .host_mtl_irq_status = dwmac4_irq_mtl_status,
+       .flow_ctrl = dwmac4_flow_ctrl,
+       .pmt = dwmac4_pmt,
+       .set_umac_addr = dwmac4_set_umac_addr,
+       .get_umac_addr = dwmac4_get_umac_addr,
+       .set_eee_mode = dwmac4_set_eee_mode,
+       .reset_eee_mode = dwmac4_reset_eee_mode,
+       .set_eee_timer = dwmac4_set_eee_timer,
+       .set_eee_pls = dwmac4_set_eee_pls,
+       .pcs_ctrl_ane = dwmac4_ctrl_ane,
+       .pcs_rane = dwmac4_rane,
+       .pcs_get_adv_lp = dwmac4_get_adv_lp,
+       .debug = dwmac4_debug,
+       .set_filter = dwmac4_set_filter,
+};
+
+static const struct stmmac_ops dwmac410_ops = {
+       .core_init = dwmac4_core_init,
+       .set_mac = stmmac_dwmac4_set_mac,
+       .rx_ipc = dwmac4_rx_ipc_enable,
+       .rx_queue_enable = dwmac4_rx_queue_enable,
+       .rx_queue_prio = dwmac4_rx_queue_priority,
+       .tx_queue_prio = dwmac4_tx_queue_priority,
+       .rx_queue_routing = dwmac4_tx_queue_routing,
+       .prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
+       .prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
+       .set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
+       .map_mtl_to_dma = dwmac4_map_mtl_dma,
+       .config_cbs = dwmac4_config_cbs,
+       .dump_regs = dwmac4_dump_regs,
+       .host_irq_status = dwmac4_irq_status,
+       .host_mtl_irq_status = dwmac4_irq_mtl_status,
        .flow_ctrl = dwmac4_flow_ctrl,
        .pmt = dwmac4_pmt,
        .set_umac_addr = dwmac4_set_umac_addr,
@@ -492,8 +747,6 @@ struct mac_device_info *dwmac4_setup(voi
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
-       mac->mac = &dwmac4_ops;
-
        mac->link.port = GMAC_CONFIG_PS;
        mac->link.duplex = GMAC_CONFIG_DM;
        mac->link.speed = GMAC_CONFIG_FES;
@@ -514,5 +767,10 @@ struct mac_device_info *dwmac4_setup(voi
        else
                mac->dma = &dwmac4_dma_ops;
 
+       if (*synopsys_id >= DWMAC_CORE_4_00)
+               mac->mac = &dwmac410_ops;
+       else
+               mac->mac = &dwmac4_ops;
+
        return mac;
 }
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -214,13 +214,13 @@ static int dwmac4_wrback_get_tx_timestam
 {
        /* Context type from W/B descriptor must be zero */
        if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
-               return -EINVAL;
+               return 0;
 
        /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
        if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
-               return 0;
+               return 1;
 
-       return 1;
+       return 0;
 }
 
 static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
@@ -282,7 +282,10 @@ static int dwmac4_wrback_get_rx_timestam
                }
        }
 exit:
-       return ret;
+       if (likely(ret == 0))
+               return 1;
+
+       return 0;
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
@@ -304,12 +307,13 @@ static void dwmac4_rd_init_tx_desc(struc
 
 static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                      bool csum_flag, int mode, bool tx_own,
-                                     bool ls)
+                                     bool ls, unsigned int tot_pkt_len)
 {
        unsigned int tdes3 = le32_to_cpu(p->des3);
 
        p->des2 |= cpu_to_le32(len & TDES2_BUFFER1_SIZE_MASK);
 
+       tdes3 |= tot_pkt_len & TDES3_PACKET_SIZE_MASK;
        if (is_fs)
                tdes3 |= TDES3_FIRST_DESCRIPTOR;
        else
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -71,36 +71,48 @@ static void dwmac4_dma_axi(void __iomem
        writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
-static void dwmac4_dma_init_channel(void __iomem *ioaddr,
-                                   struct stmmac_dma_cfg *dma_cfg,
-                                   u32 dma_tx_phy, u32 dma_rx_phy,
-                                   u32 channel)
+void dwmac4_dma_init_rx_chan(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_rx_phy, u32 chan)
 {
        u32 value;
-       int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
-       int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
+       u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
-       /* set PBL for each channels. Currently we affect same configuration
-        * on each channel
-        */
-       value = readl(ioaddr + DMA_CHAN_CONTROL(channel));
-       if (dma_cfg->pblx8)
-               value = value | DMA_BUS_MODE_PBL;
-       writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
+       value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+       value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+
+       writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(chan));
+}
 
-       value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
+void dwmac4_dma_init_tx_chan(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg,
+                            u32 dma_tx_phy, u32 chan)
+{
+       u32 value;
+       u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+
+       value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
        value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(channel));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
-       value = readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
-       value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(channel));
+       writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan));
+}
 
-       /* Mask interrupts by writing to CSR7 */
-       writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr + DMA_CHAN_INTR_ENA(channel));
+void dwmac4_dma_init_channel(void __iomem *ioaddr,
+                            struct stmmac_dma_cfg *dma_cfg, u32 chan)
+{
+       u32 value;
+
+       /* common channel control register config */
+       value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+       if (dma_cfg->pblx8)
+               value = value | DMA_BUS_MODE_PBL;
+       writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 
-       writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(channel));
-       writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(channel));
+       /* Mask interrupts by writing to CSR7 */
+       writel(DMA_CHAN_INTR_DEFAULT_MASK,
+              ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 static void dwmac4_dma_init(void __iomem *ioaddr,
@@ -108,7 +120,6 @@ static void dwmac4_dma_init(void __iomem
                            u32 dma_tx, u32 dma_rx, int atds)
 {
        u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
-       int i;
 
        /* Set the Fixed burst mode */
        if (dma_cfg->fixed_burst)
@@ -122,9 +133,6 @@ static void dwmac4_dma_init(void __iomem
                value |= DMA_SYS_BUS_AAL;
 
        writel(value, ioaddr + DMA_SYS_BUS_MODE);
-
-       for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-               dwmac4_dma_init_channel(ioaddr, dma_cfg, dma_tx, dma_rx, i);
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel,
@@ -174,46 +182,121 @@ static void dwmac4_dump_dma_regs(void __
                _dwmac4_dump_dma_regs(ioaddr, i, reg_space);
 }
 
-static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt)
+static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
 {
-       int i;
+       u32 chan;
 
-       for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-               writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(i));
+       for (chan = 0; chan < number_chan; chan++)
+               writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(chan));
 }
 
-static void dwmac4_dma_chan_op_mode(void __iomem *ioaddr, int txmode,
-                                   int rxmode, u32 channel)
+static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
+                                      u32 channel, int fifosz)
 {
-       u32 mtl_tx_op, mtl_rx_op, mtl_rx_int;
+       unsigned int rqs = fifosz / 256 - 1;
+       u32 mtl_rx_op, mtl_rx_int;
 
-       /* Following code only done for channel 0, other channels not yet
-        * supported.
-        */
-       mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+       mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+
+       if (mode == SF_DMA_MODE) {
+               pr_debug("GMAC: enable RX store and forward mode\n");
+               mtl_rx_op |= MTL_OP_MODE_RSF;
+       } else {
+               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", mode);
+               mtl_rx_op &= ~MTL_OP_MODE_RSF;
+               mtl_rx_op &= MTL_OP_MODE_RTC_MASK;
+               if (mode <= 32)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_32;
+               else if (mode <= 64)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_64;
+               else if (mode <= 96)
+                       mtl_rx_op |= MTL_OP_MODE_RTC_96;
+               else
+                       mtl_rx_op |= MTL_OP_MODE_RTC_128;
+       }
+
+       mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK;
+       mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT;
+
+       /* enable flow control only if each channel gets 4 KiB or more FIFO */
+       if (fifosz >= 4096) {
+               unsigned int rfd, rfa;
+
+               mtl_rx_op |= MTL_OP_MODE_EHFC;
+
+               /* Set Threshold for Activating Flow Control to min 2 frames,
+                * i.e. 1500 * 2 = 3000 bytes.
+                *
+                * Set Threshold for Deactivating Flow Control to min 1 frame,
+                * i.e. 1500 bytes.
+                */
+               switch (fifosz) {
+               case 4096:
+                       /* This violates the above formula because of FIFO size
+                        * limit therefore overflow may occur in spite of this.
+                        */
+                       rfd = 0x03; /* Full-2.5K */
+                       rfa = 0x01; /* Full-1.5K */
+                       break;
+
+               case 8192:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x0a; /* Full-6K */
+                       break;
+
+               case 16384:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x12; /* Full-10K */
+                       break;
+
+               default:
+                       rfd = 0x06; /* Full-4K */
+                       rfa = 0x1e; /* Full-16K */
+                       break;
+               }
 
-       if (txmode == SF_DMA_MODE) {
+               mtl_rx_op &= ~MTL_OP_MODE_RFD_MASK;
+               mtl_rx_op |= rfd << MTL_OP_MODE_RFD_SHIFT;
+
+               mtl_rx_op &= ~MTL_OP_MODE_RFA_MASK;
+               mtl_rx_op |= rfa << MTL_OP_MODE_RFA_SHIFT;
+       }
+
+       writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
+
+       /* Enable MTL RX overflow */
+       mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
+       writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
+              ioaddr + MTL_CHAN_INT_CTRL(channel));
+}
+
+static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
+                                      u32 channel)
+{
+       u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+
+       if (mode == SF_DMA_MODE) {
                pr_debug("GMAC: enable TX store and forward mode\n");
                /* Transmit COE type 2 cannot be done in cut-through mode. */
                mtl_tx_op |= MTL_OP_MODE_TSF;
        } else {
-               pr_debug("GMAC: disabling TX SF (threshold %d)\n", txmode);
+               pr_debug("GMAC: disabling TX SF (threshold %d)\n", mode);
                mtl_tx_op &= ~MTL_OP_MODE_TSF;
                mtl_tx_op &= MTL_OP_MODE_TTC_MASK;
                /* Set the transmit threshold */
-               if (txmode <= 32)
+               if (mode <= 32)
                        mtl_tx_op |= MTL_OP_MODE_TTC_32;
-               else if (txmode <= 64)
+               else if (mode <= 64)
                        mtl_tx_op |= MTL_OP_MODE_TTC_64;
-               else if (txmode <= 96)
+               else if (mode <= 96)
                        mtl_tx_op |= MTL_OP_MODE_TTC_96;
-               else if (txmode <= 128)
+               else if (mode <= 128)
                        mtl_tx_op |= MTL_OP_MODE_TTC_128;
-               else if (txmode <= 192)
+               else if (mode <= 192)
                        mtl_tx_op |= MTL_OP_MODE_TTC_192;
-               else if (txmode <= 256)
+               else if (mode <= 256)
                        mtl_tx_op |= MTL_OP_MODE_TTC_256;
-               else if (txmode <= 384)
+               else if (mode <= 384)
                        mtl_tx_op |= MTL_OP_MODE_TTC_384;
                else
                        mtl_tx_op |= MTL_OP_MODE_TTC_512;
@@ -230,39 +313,6 @@ static void dwmac4_dma_chan_op_mode(void
         */
        mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
        writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
-
-       mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
-       if (rxmode == SF_DMA_MODE) {
-               pr_debug("GMAC: enable RX store and forward mode\n");
-               mtl_rx_op |= MTL_OP_MODE_RSF;
-       } else {
-               pr_debug("GMAC: disable RX SF mode (threshold %d)\n", rxmode);
-               mtl_rx_op &= ~MTL_OP_MODE_RSF;
-               mtl_rx_op &= MTL_OP_MODE_RTC_MASK;
-               if (rxmode <= 32)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_32;
-               else if (rxmode <= 64)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_64;
-               else if (rxmode <= 96)
-                       mtl_rx_op |= MTL_OP_MODE_RTC_96;
-               else
-                       mtl_rx_op |= MTL_OP_MODE_RTC_128;
-       }
-
-       writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
-       /* Enable MTL RX overflow */
-       mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
-       writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
-              ioaddr + MTL_CHAN_INT_CTRL(channel));
-}
-
-static void dwmac4_dma_operation_mode(void __iomem *ioaddr, int txmode,
-                                     int rxmode, int rxfifosz)
-{
-       /* Only Channel 0 is actually configured and used */
-       dwmac4_dma_chan_op_mode(ioaddr, txmode, rxmode, 0);
 }
 
 static void dwmac4_get_hw_feature(void __iomem *ioaddr,
@@ -294,6 +344,11 @@ static void dwmac4_get_hw_feature(void _
        hw_cap = readl(ioaddr + GMAC_HW_FEATURE1);
        dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20;
        dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18;
+       /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by
+        * shifting and store the sizes in bytes.
+        */
+       dma_cap->tx_fifo_size = 128 << ((hw_cap & GMAC_HW_TXFIFOSIZE) >> 6);
+       dma_cap->rx_fifo_size = 128 << ((hw_cap & GMAC_HW_RXFIFOSIZE) >> 0);
        /* MAC HW feature2 */
        hw_cap = readl(ioaddr + GMAC_HW_FEATURE2);
        /* TX and RX number of channels */
@@ -332,9 +387,13 @@ static void dwmac4_enable_tso(void __iom
 const struct stmmac_dma_ops dwmac4_dma_ops = {
        .reset = dwmac4_dma_reset,
        .init = dwmac4_dma_init,
+       .init_chan = dwmac4_dma_init_channel,
+       .init_rx_chan = dwmac4_dma_init_rx_chan,
+       .init_tx_chan = dwmac4_dma_init_tx_chan,
        .axi = dwmac4_dma_axi,
        .dump_regs = dwmac4_dump_dma_regs,
-       .dma_mode = dwmac4_dma_operation_mode,
+       .dma_rx_mode = dwmac4_dma_rx_chan_op_mode,
+       .dma_tx_mode = dwmac4_dma_tx_chan_op_mode,
        .enable_dma_irq = dwmac4_enable_dma_irq,
        .disable_dma_irq = dwmac4_disable_dma_irq,
        .start_tx = dwmac4_dma_start_tx,
@@ -354,9 +413,13 @@ const struct stmmac_dma_ops dwmac4_dma_o
 const struct stmmac_dma_ops dwmac410_dma_ops = {
        .reset = dwmac4_dma_reset,
        .init = dwmac4_dma_init,
+       .init_chan = dwmac4_dma_init_channel,
+       .init_rx_chan = dwmac4_dma_init_rx_chan,
+       .init_tx_chan = dwmac4_dma_init_tx_chan,
        .axi = dwmac4_dma_axi,
        .dump_regs = dwmac4_dump_dma_regs,
-       .dma_mode = dwmac4_dma_operation_mode,
+       .dma_rx_mode = dwmac4_dma_rx_chan_op_mode,
+       .dma_tx_mode = dwmac4_dma_tx_chan_op_mode,
        .enable_dma_irq = dwmac410_enable_dma_irq,
        .disable_dma_irq = dwmac4_disable_dma_irq,
        .start_tx = dwmac4_dma_start_tx,
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -185,17 +185,17 @@
 
 int dwmac4_dma_reset(void __iomem *ioaddr);
 void dwmac4_enable_dma_transmission(void __iomem *ioaddr, u32 tail_ptr);
-void dwmac4_enable_dma_irq(void __iomem *ioaddr);
-void dwmac410_enable_dma_irq(void __iomem *ioaddr);
-void dwmac4_disable_dma_irq(void __iomem *ioaddr);
-void dwmac4_dma_start_tx(void __iomem *ioaddr);
-void dwmac4_dma_stop_tx(void __iomem *ioaddr);
-void dwmac4_dma_start_rx(void __iomem *ioaddr);
-void dwmac4_dma_stop_rx(void __iomem *ioaddr);
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
+void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan);
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-                        struct stmmac_extra_stats *x);
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len);
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len);
+                        struct stmmac_extra_stats *x, u32 chan);
+void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
+void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -37,96 +37,96 @@ int dwmac4_dma_reset(void __iomem *ioadd
 
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
 {
-       writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(0));
+       writel(tail_ptr, ioaddr + DMA_CHAN_RX_END_ADDR(chan));
 }
 
 void dwmac4_set_tx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
 {
-       writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(0));
+       writel(tail_ptr, ioaddr + DMA_CHAN_TX_END_ADDR(chan));
 }
 
-void dwmac4_dma_start_tx(void __iomem *ioaddr)
+void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value |= DMA_CONTROL_ST;
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value |= GMAC_CONFIG_TE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_stop_tx(void __iomem *ioaddr)
+void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value &= ~DMA_CONTROL_ST;
-       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value &= ~GMAC_CONFIG_TE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_start_rx(void __iomem *ioaddr)
+void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value |= DMA_CONTROL_SR;
 
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value |= GMAC_CONFIG_RE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_dma_stop_rx(void __iomem *ioaddr)
+void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 {
-       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value &= ~DMA_CONTROL_SR;
-       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(STMMAC_CHAN0));
+       writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
 
        value = readl(ioaddr + GMAC_CONFIG);
        value &= ~GMAC_CONFIG_RE;
        writel(value, ioaddr + GMAC_CONFIG);
 }
 
-void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len)
+void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
 {
-       writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(STMMAC_CHAN0));
+       writel(len, ioaddr + DMA_CHAN_TX_RING_LEN(chan));
 }
 
-void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len)
+void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
 {
-       writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(STMMAC_CHAN0));
+       writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan));
 }
 
-void dwmac4_enable_dma_irq(void __iomem *ioaddr)
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr +
-              DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+              DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac410_enable_dma_irq(void __iomem *ioaddr)
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
-              ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+              ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac4_disable_dma_irq(void __iomem *ioaddr)
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
-       writel(0, ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+       writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-                        struct stmmac_extra_stats *x)
+                        struct stmmac_extra_stats *x, u32 chan)
 {
        int ret = 0;
 
-       u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(0));
+       u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
 
        /* ABNORMAL interrupts */
        if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) {
@@ -153,7 +153,7 @@ int dwmac4_dma_interrupt(void __iomem *i
                if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
                        u32 value;
 
-                       value = readl(ioaddr + DMA_CHAN_INTR_ENA(STMMAC_CHAN0));
+                       value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
                        /* to schedule NAPI on real RIE event. */
                        if (likely(value & DMA_CHAN_INTR_ENA_RIE)) {
                                x->rx_normal_irq_n++;
@@ -172,7 +172,7 @@ int dwmac4_dma_interrupt(void __iomem *i
         * status [21-0] expect reserved bits [5-3]
         */
        writel((intr_status & 0x3fffc7),
-              ioaddr + DMA_CHAN_STATUS(STMMAC_CHAN0));
+              ioaddr + DMA_CHAN_STATUS(chan));
 
        return ret;
 }
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -137,13 +137,14 @@
 #define DMA_CONTROL_FTF                0x00100000      /* Flush transmit FIFO */
 
 void dwmac_enable_dma_transmission(void __iomem *ioaddr);
-void dwmac_enable_dma_irq(void __iomem *ioaddr);
-void dwmac_disable_dma_irq(void __iomem *ioaddr);
-void dwmac_dma_start_tx(void __iomem *ioaddr);
-void dwmac_dma_stop_tx(void __iomem *ioaddr);
-void dwmac_dma_start_rx(void __iomem *ioaddr);
-void dwmac_dma_stop_rx(void __iomem *ioaddr);
-int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x);
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
+void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan);
+int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x,
+                       u32 chan);
 int dwmac_dma_reset(void __iomem *ioaddr);
 
 #endif /* __DWMAC_DMA_H__ */
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -47,38 +47,38 @@ void dwmac_enable_dma_transmission(void
        writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
 }
 
-void dwmac_enable_dma_irq(void __iomem *ioaddr)
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_disable_dma_irq(void __iomem *ioaddr)
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
 {
        writel(0, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_dma_start_tx(void __iomem *ioaddr)
+void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value |= DMA_CONTROL_ST;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_tx(void __iomem *ioaddr)
+void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value &= ~DMA_CONTROL_ST;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_start_rx(void __iomem *ioaddr)
+void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value |= DMA_CONTROL_SR;
        writel(value, ioaddr + DMA_CONTROL);
 }
 
-void dwmac_dma_stop_rx(void __iomem *ioaddr)
+void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 {
        u32 value = readl(ioaddr + DMA_CONTROL);
        value &= ~DMA_CONTROL_SR;
@@ -156,7 +156,7 @@ static void show_rx_process_state(unsign
 #endif
 
 int dwmac_dma_interrupt(void __iomem *ioaddr,
-                       struct stmmac_extra_stats *x)
+                       struct stmmac_extra_stats *x, u32 chan)
 {
        int ret = 0;
        /* read the status register (CSR5) */
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -315,7 +315,7 @@ static void enh_desc_release_tx_desc(str
 
 static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                     bool csum_flag, int mode, bool tx_own,
-                                    bool ls)
+                                    bool ls, unsigned int tot_pkt_len)
 {
        unsigned int tdes0 = le32_to_cpu(p->des0);
 
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -191,7 +191,7 @@ static void ndesc_release_tx_desc(struct
 
 static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                  bool csum_flag, int mode, bool tx_own,
-                                 bool ls)
+                                 bool ls, unsigned int tot_pkt_len)
 {
        unsigned int tdes1 = le32_to_cpu(p->des1);
 
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -26,16 +26,17 @@
 
 static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 {
-       struct stmmac_priv *priv = (struct stmmac_priv *)p;
-       unsigned int entry = priv->cur_tx;
-       struct dma_desc *desc;
+       struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)p;
        unsigned int nopaged_len = skb_headlen(skb);
+       struct stmmac_priv *priv = tx_q->priv_data;
+       unsigned int entry = tx_q->cur_tx;
        unsigned int bmax, len, des2;
+       struct dma_desc *desc;
 
        if (priv->extend_desc)
-               desc = (struct dma_desc *)(priv->dma_etx + entry);
+               desc = (struct dma_desc *)(tx_q->dma_etx + entry);
        else
-               desc = priv->dma_tx + entry;
+               desc = tx_q->dma_tx + entry;
 
        if (priv->plat->enh_desc)
                bmax = BUF_SIZE_8KiB;
@@ -52,48 +53,51 @@ static int stmmac_jumbo_frm(void *p, str
                if (dma_mapping_error(priv->device, des2))
                        return -1;
 
-               priv->tx_skbuff_dma[entry].buf = des2;
-               priv->tx_skbuff_dma[entry].len = bmax;
-               priv->tx_skbuff_dma[entry].is_jumbo = true;
+               tx_q->tx_skbuff_dma[entry].buf = des2;
+               tx_q->tx_skbuff_dma[entry].len = bmax;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 
                desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
-                                               STMMAC_RING_MODE, 0, false);
-               priv->tx_skbuff[entry] = NULL;
+                                               STMMAC_RING_MODE, 0,
+                                               false, skb->len);
+               tx_q->tx_skbuff[entry] = NULL;
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
                if (priv->extend_desc)
-                       desc = (struct dma_desc *)(priv->dma_etx + entry);
+                       desc = (struct dma_desc *)(tx_q->dma_etx + entry);
                else
-                       desc = priv->dma_tx + entry;
+                       desc = tx_q->dma_tx + entry;
 
                des2 = dma_map_single(priv->device, skb->data + bmax, len,
                                      DMA_TO_DEVICE);
                desc->des2 = cpu_to_le32(des2);
                if (dma_mapping_error(priv->device, des2))
                        return -1;
-               priv->tx_skbuff_dma[entry].buf = des2;
-               priv->tx_skbuff_dma[entry].len = len;
-               priv->tx_skbuff_dma[entry].is_jumbo = true;
+               tx_q->tx_skbuff_dma[entry].buf = des2;
+               tx_q->tx_skbuff_dma[entry].len = len;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = true;
 
                desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
-                                               STMMAC_RING_MODE, 1, true);
+                                               STMMAC_RING_MODE, 1,
+                                               true, skb->len);
        } else {
                des2 = dma_map_single(priv->device, skb->data,
                                      nopaged_len, DMA_TO_DEVICE);
                desc->des2 = cpu_to_le32(des2);
                if (dma_mapping_error(priv->device, des2))
                        return -1;
-               priv->tx_skbuff_dma[entry].buf = des2;
-               priv->tx_skbuff_dma[entry].len = nopaged_len;
-               priv->tx_skbuff_dma[entry].is_jumbo = true;
+               tx_q->tx_skbuff_dma[entry].buf = des2;
+               tx_q->tx_skbuff_dma[entry].len = nopaged_len;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = true;
                desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
-                                               STMMAC_RING_MODE, 0, true);
+                                               STMMAC_RING_MODE, 0,
+                                               true, skb->len);
        }
 
-       priv->cur_tx = entry;
+       tx_q->cur_tx = entry;
 
        return entry;
 }
@@ -125,12 +129,13 @@ static void stmmac_init_desc3(struct dma
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 {
-       struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr;
-       unsigned int entry = priv->dirty_tx;
+       struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)priv_ptr;
+       struct stmmac_priv *priv = tx_q->priv_data;
+       unsigned int entry = tx_q->dirty_tx;
 
        /* des3 is only used for jumbo frames tx or time stamping */
-       if (unlikely(priv->tx_skbuff_dma[entry].is_jumbo ||
-                    (priv->tx_skbuff_dma[entry].last_segment &&
+       if (unlikely(tx_q->tx_skbuff_dma[entry].is_jumbo ||
+                    (tx_q->tx_skbuff_dma[entry].last_segment &&
                      !priv->extend_desc && priv->hwts_tx_en)))
                p->des3 = 0;
 }
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -46,38 +46,51 @@ struct stmmac_tx_info {
        bool is_jumbo;
 };
 
-struct stmmac_priv {
-       /* Frequently used values are kept adjacent for cache effect */
+/* Frequently used values are kept adjacent for cache effect */
+struct stmmac_tx_queue {
+       u32 queue_index;
+       struct stmmac_priv *priv_data;
        struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
        struct dma_desc *dma_tx;
        struct sk_buff **tx_skbuff;
+       struct stmmac_tx_info *tx_skbuff_dma;
        unsigned int cur_tx;
        unsigned int dirty_tx;
+       dma_addr_t dma_tx_phy;
+       u32 tx_tail_addr;
+};
+
+struct stmmac_rx_queue {
+       u32 queue_index;
+       struct stmmac_priv *priv_data;
+       struct dma_extended_desc *dma_erx;
+       struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
+       struct sk_buff **rx_skbuff;
+       dma_addr_t *rx_skbuff_dma;
+       unsigned int cur_rx;
+       unsigned int dirty_rx;
+       u32 rx_zeroc_thresh;
+       dma_addr_t dma_rx_phy;
+       u32 rx_tail_addr;
+       struct napi_struct napi ____cacheline_aligned_in_smp;
+};
+
+struct stmmac_priv {
+       /* Frequently used values are kept adjacent for cache effect */
        u32 tx_count_frames;
        u32 tx_coal_frames;
        u32 tx_coal_timer;
-       struct stmmac_tx_info *tx_skbuff_dma;
-       dma_addr_t dma_tx_phy;
+
        int tx_coalesce;
        int hwts_tx_en;
        bool tx_path_in_lpi_mode;
        struct timer_list txtimer;
        bool tso;
 
-       struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
-       struct dma_extended_desc *dma_erx;
-       struct sk_buff **rx_skbuff;
-       unsigned int cur_rx;
-       unsigned int dirty_rx;
        unsigned int dma_buf_sz;
        unsigned int rx_copybreak;
-       unsigned int rx_zeroc_thresh;
        u32 rx_riwt;
        int hwts_rx_en;
-       dma_addr_t *rx_skbuff_dma;
-       dma_addr_t dma_rx_phy;
-
-       struct napi_struct napi ____cacheline_aligned_in_smp;
 
        void __iomem *ioaddr;
        struct net_device *dev;
@@ -85,6 +98,12 @@ struct stmmac_priv {
        struct mac_device_info *hw;
        spinlock_t lock;
 
+       /* RX Queue */
+       struct stmmac_rx_queue rx_queue[MTL_MAX_RX_QUEUES];
+
+       /* TX Queue */
+       struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES];
+
        int oldlink;
        int speed;
        int oldduplex;
@@ -119,8 +138,6 @@ struct stmmac_priv {
        spinlock_t ptp_lock;
        void __iomem *mmcaddr;
        void __iomem *ptpaddr;
-       u32 rx_tail_addr;
-       u32 tx_tail_addr;
        u32 mss;
 
 #ifdef CONFIG_DEBUG_FS
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -481,6 +481,7 @@ stmmac_set_pauseparam(struct net_device
                      struct ethtool_pauseparam *pause)
 {
        struct stmmac_priv *priv = netdev_priv(netdev);
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
        struct phy_device *phy = netdev->phydev;
        int new_pause = FLOW_OFF;
 
@@ -511,7 +512,7 @@ stmmac_set_pauseparam(struct net_device
        }
 
        priv->hw->mac->flow_ctrl(priv->hw, phy->duplex, priv->flow_ctrl,
-                                priv->pause);
+                                priv->pause, tx_cnt);
        return 0;
 }
 
@@ -519,6 +520,8 @@ static void stmmac_get_ethtool_stats(str
                                 struct ethtool_stats *dummy, u64 *data)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
        int i, j = 0;
 
        /* Update the DMA HW counters for dwmac10/100 */
@@ -549,7 +552,8 @@ static void stmmac_get_ethtool_stats(str
                if ((priv->hw->mac->debug) &&
                    (priv->synopsys_id >= DWMAC_CORE_3_50))
                        priv->hw->mac->debug(priv->ioaddr,
-                                            (void *)&priv->xstats);
+                                            (void *)&priv->xstats,
+                                            rx_queues_count, tx_queues_count);
        }
        for (i = 0; i < STMMAC_STATS_LEN; i++) {
                char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset;
@@ -726,6 +730,7 @@ static int stmmac_set_coalesce(struct ne
                               struct ethtool_coalesce *ec)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
        unsigned int rx_riwt;
 
        /* Check not supported parameters  */
@@ -764,7 +769,7 @@ static int stmmac_set_coalesce(struct ne
        priv->tx_coal_frames = ec->tx_max_coalesced_frames;
        priv->tx_coal_timer = ec->tx_coalesce_usecs;
        priv->rx_riwt = rx_riwt;
-       priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt);
+       priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt, rx_cnt);
 
        return 0;
 }
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -139,6 +139,64 @@ static void stmmac_verify_args(void)
 }
 
 /**
+ * stmmac_disable_all_queues - Disable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_queues_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               napi_disable(&rx_q->napi);
+       }
+}
+
+/**
+ * stmmac_enable_all_queues - Enable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_enable_all_queues(struct stmmac_priv *priv)
+{
+       u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_queues_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               napi_enable(&rx_q->napi);
+       }
+}
+
+/**
+ * stmmac_stop_all_queues - Stop all queues
+ * @priv: driver private structure
+ */
+static void stmmac_stop_all_queues(struct stmmac_priv *priv)
+{
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_cnt; queue++)
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
+}
+
+/**
+ * stmmac_start_all_queues - Start all queues
+ * @priv: driver private structure
+ */
+static void stmmac_start_all_queues(struct stmmac_priv *priv)
+{
+       u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_cnt; queue++)
+               netif_tx_start_queue(netdev_get_tx_queue(priv->dev, queue));
+}
+
+/**
  * stmmac_clk_csr_set - dynamically set the MDC clock
  * @priv: driver private structure
  * Description: this is to dynamically set the MDC clock according to the csr
@@ -185,26 +243,33 @@ static void print_pkt(unsigned char *buf
        print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, buf, len);
 }
 
-static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
+static inline u32 stmmac_tx_avail(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        u32 avail;
 
-       if (priv->dirty_tx > priv->cur_tx)
-               avail = priv->dirty_tx - priv->cur_tx - 1;
+       if (tx_q->dirty_tx > tx_q->cur_tx)
+               avail = tx_q->dirty_tx - tx_q->cur_tx - 1;
        else
-               avail = DMA_TX_SIZE - priv->cur_tx + priv->dirty_tx - 1;
+               avail = DMA_TX_SIZE - tx_q->cur_tx + tx_q->dirty_tx - 1;
 
        return avail;
 }
 
-static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv)
+/**
+ * stmmac_rx_dirty - Get RX queue dirty
+ * @priv: driver private structure
+ * @queue: RX queue index
+ */
+static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        u32 dirty;
 
-       if (priv->dirty_rx <= priv->cur_rx)
-               dirty = priv->cur_rx - priv->dirty_rx;
+       if (rx_q->dirty_rx <= rx_q->cur_rx)
+               dirty = rx_q->cur_rx - rx_q->dirty_rx;
        else
-               dirty = DMA_RX_SIZE - priv->dirty_rx + priv->cur_rx;
+               dirty = DMA_RX_SIZE - rx_q->dirty_rx + rx_q->cur_rx;
 
        return dirty;
 }
@@ -232,9 +297,19 @@ static inline void stmmac_hw_fix_mac_spe
  */
 static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
 {
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       /* check if all TX queues have the work finished */
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               if (tx_q->dirty_tx != tx_q->cur_tx)
+                       return; /* still unfinished work */
+       }
+
        /* Check and enter in LPI mode */
-       if ((priv->dirty_tx == priv->cur_tx) &&
-           (priv->tx_path_in_lpi_mode == false))
+       if (!priv->tx_path_in_lpi_mode)
                priv->hw->mac->set_eee_mode(priv->hw,
                                            priv->plat->en_tx_lpi_clockgating);
 }
@@ -365,14 +440,14 @@ static void stmmac_get_tx_hwtstamp(struc
                return;
 
        /* check tx tstamp status */
-       if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+       if (priv->hw->desc->get_tx_timestamp_status(p)) {
                /* get the valid tstamp */
                ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
                memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
-               netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+               netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns);
                /* pass tstamp to stack */
                skb_tstamp_tx(skb, &shhwtstamp);
        }
@@ -399,19 +474,19 @@ static void stmmac_get_rx_hwtstamp(struc
                return;
 
        /* Check if timestamp is available */
-       if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
                /* For GMAC4, the valid timestamp is from CTX next desc. */
                if (priv->plat->has_gmac4)
                        ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
                else
                        ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-               netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+               netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
                memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp->hwtstamp = ns_to_ktime(ns);
        } else  {
-               netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+               netdev_dbg(priv->dev, "cannot get RX hw timestamp\n");
        }
 }
 
@@ -688,6 +763,19 @@ static void stmmac_release_ptp(struct st
 }
 
 /**
+ *  stmmac_mac_flow_ctrl - Configure flow control in all queues
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the flow control in all queues
+ */
+static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+
+       priv->hw->mac->flow_ctrl(priv->hw, duplex, priv->flow_ctrl,
+                                priv->pause, tx_cnt);
+}
+
+/**
  * stmmac_adjust_link - adjusts the link parameters
  * @dev: net device structure
  * Description: this is the helper called by the physical abstraction layer
@@ -702,7 +790,6 @@ static void stmmac_adjust_link(struct ne
        struct phy_device *phydev = dev->phydev;
        unsigned long flags;
        int new_state = 0;
-       unsigned int fc = priv->flow_ctrl, pause_time = priv->pause;
 
        if (!phydev)
                return;
@@ -724,8 +811,7 @@ static void stmmac_adjust_link(struct ne
                }
                /* Flow Control operation */
                if (phydev->pause)
-                       priv->hw->mac->flow_ctrl(priv->hw, phydev->duplex,
-                                                fc, pause_time);
+                       stmmac_mac_flow_ctrl(priv, phydev->duplex);
 
                if (phydev->speed != priv->speed) {
                        new_state = 1;
@@ -893,22 +979,56 @@ static int stmmac_init_phy(struct net_de
        return 0;
 }
 
-static void stmmac_display_rings(struct stmmac_priv *priv)
+static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 {
-       void *head_rx, *head_tx;
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       void *head_rx;
+       u32 queue;
 
-       if (priv->extend_desc) {
-               head_rx = (void *)priv->dma_erx;
-               head_tx = (void *)priv->dma_etx;
-       } else {
-               head_rx = (void *)priv->dma_rx;
-               head_tx = (void *)priv->dma_tx;
+       /* Display RX rings */
+       for (queue = 0; queue < rx_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               pr_info("\tRX Queue %u rings\n", queue);
+
+               if (priv->extend_desc)
+                       head_rx = (void *)rx_q->dma_erx;
+               else
+                       head_rx = (void *)rx_q->dma_rx;
+
+               /* Display RX ring */
+               priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
+       }
+}
+
+static void stmmac_display_tx_rings(struct stmmac_priv *priv)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       void *head_tx;
+       u32 queue;
+
+       /* Display TX rings */
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               pr_info("\tTX Queue %d rings\n", queue);
+
+               if (priv->extend_desc)
+                       head_tx = (void *)tx_q->dma_etx;
+               else
+                       head_tx = (void *)tx_q->dma_tx;
+
+               priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
        }
+}
+
+static void stmmac_display_rings(struct stmmac_priv *priv)
+{
+       /* Display RX ring */
+       stmmac_display_rx_rings(priv);
 
-       /* Display Rx ring */
-       priv->hw->desc->display_ring(head_rx, DMA_RX_SIZE, true);
-       /* Display Tx ring */
-       priv->hw->desc->display_ring(head_tx, DMA_TX_SIZE, false);
+       /* Display TX ring */
+       stmmac_display_tx_rings(priv);
 }
 
 static int stmmac_set_bfsize(int mtu, int bufsize)
@@ -928,48 +1048,88 @@ static int stmmac_set_bfsize(int mtu, in
 }
 
 /**
- * stmmac_clear_descriptors - clear descriptors
+ * stmmac_clear_rx_descriptors - clear RX descriptors
  * @priv: driver private structure
- * Description: this function is called to clear the tx and rx descriptors
+ * @queue: RX queue index
+ * Description: this function is called to clear the RX descriptors
  * in case of both basic and extended descriptors are used.
  */
-static void stmmac_clear_descriptors(struct stmmac_priv *priv)
+static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        int i;
 
-       /* Clear the Rx/Tx descriptors */
+       /* Clear the RX descriptors */
        for (i = 0; i < DMA_RX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_rx_desc(&priv->dma_erx[i].basic,
+                       priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic,
                                                     priv->use_riwt, priv->mode,
                                                     (i == DMA_RX_SIZE - 1));
                else
-                       priv->hw->desc->init_rx_desc(&priv->dma_rx[i],
+                       priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i],
                                                     priv->use_riwt, priv->mode,
                                                     (i == DMA_RX_SIZE - 1));
+}
+
+/**
+ * stmmac_clear_tx_descriptors - clear tx descriptors
+ * @priv: driver private structure
+ * @queue: TX queue index.
+ * Description: this function is called to clear the TX descriptors
+ * in case of both basic and extended descriptors are used.
+ */
+static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+       int i;
+
+       /* Clear the TX descriptors */
        for (i = 0; i < DMA_TX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
                else
-                       priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
 }
 
 /**
+ * stmmac_clear_descriptors - clear descriptors
+ * @priv: driver private structure
+ * Description: this function is called to clear the TX and RX descriptors
+ * in case of both basic and extended descriptors are used.
+ */
+static void stmmac_clear_descriptors(struct stmmac_priv *priv)
+{
+       u32 rx_queue_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       /* Clear the RX descriptors */
+       for (queue = 0; queue < rx_queue_cnt; queue++)
+               stmmac_clear_rx_descriptors(priv, queue);
+
+       /* Clear the TX descriptors */
+       for (queue = 0; queue < tx_queue_cnt; queue++)
+               stmmac_clear_tx_descriptors(priv, queue);
+}
+
+/**
  * stmmac_init_rx_buffers - init the RX descriptor buffer.
  * @priv: driver private structure
  * @p: descriptor pointer
  * @i: descriptor index
- * @flags: gfp flag.
+ * @flags: gfp flag
+ * @queue: RX queue index
  * Description: this function is called to allocate a receive buffer, perform
  * the DMA mapping and init the descriptor.
  */
 static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
-                                 int i, gfp_t flags)
+                                 int i, gfp_t flags, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
        struct sk_buff *skb;
 
        skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
@@ -978,20 +1138,20 @@ static int stmmac_init_rx_buffers(struct
                           "%s: Rx init fails; skb is NULL\n", __func__);
                return -ENOMEM;
        }
-       priv->rx_skbuff[i] = skb;
-       priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
+       rx_q->rx_skbuff[i] = skb;
+       rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
                                                priv->dma_buf_sz,
                                                DMA_FROM_DEVICE);
-       if (dma_mapping_error(priv->device, priv->rx_skbuff_dma[i])) {
+       if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) {
                netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
                dev_kfree_skb_any(skb);
                return -EINVAL;
        }
 
        if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[i]);
+               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
        else
-               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[i]);
+               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[i]);
 
        if ((priv->hw->mode->init_desc3) &&
            (priv->dma_buf_sz == BUF_SIZE_16KiB))
@@ -1000,30 +1160,71 @@ static int stmmac_init_rx_buffers(struct
        return 0;
 }
 
-static void stmmac_free_rx_buffers(struct stmmac_priv *priv, int i)
+/**
+ * stmmac_free_rx_buffer - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ * @i: buffer index.
+ */
+static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
-       if (priv->rx_skbuff[i]) {
-               dma_unmap_single(priv->device, priv->rx_skbuff_dma[i],
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+       if (rx_q->rx_skbuff[i]) {
+               dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i],
                                 priv->dma_buf_sz, DMA_FROM_DEVICE);
-               dev_kfree_skb_any(priv->rx_skbuff[i]);
+               dev_kfree_skb_any(rx_q->rx_skbuff[i]);
        }
-       priv->rx_skbuff[i] = NULL;
+       rx_q->rx_skbuff[i] = NULL;
 }
 
 /**
- * init_dma_desc_rings - init the RX/TX descriptor rings
+ * stmmac_free_tx_buffer - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ * @i: buffer index.
+ */
+static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
+{
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+       if (tx_q->tx_skbuff_dma[i].buf) {
+               if (tx_q->tx_skbuff_dma[i].map_as_page)
+                       dma_unmap_page(priv->device,
+                                      tx_q->tx_skbuff_dma[i].buf,
+                                      tx_q->tx_skbuff_dma[i].len,
+                                      DMA_TO_DEVICE);
+               else
+                       dma_unmap_single(priv->device,
+                                        tx_q->tx_skbuff_dma[i].buf,
+                                        tx_q->tx_skbuff_dma[i].len,
+                                        DMA_TO_DEVICE);
+       }
+
+       if (tx_q->tx_skbuff[i]) {
+               dev_kfree_skb_any(tx_q->tx_skbuff[i]);
+               tx_q->tx_skbuff[i] = NULL;
+               tx_q->tx_skbuff_dma[i].buf = 0;
+               tx_q->tx_skbuff_dma[i].map_as_page = false;
+       }
+}
+
+/**
+ * init_dma_rx_desc_rings - init the RX descriptor rings
  * @dev: net device structure
  * @flags: gfp flag.
- * Description: this function initializes the DMA RX/TX descriptors
+ * Description: this function initializes the DMA RX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
+static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 {
-       int i;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_count = priv->plat->rx_queues_to_use;
        unsigned int bfsize = 0;
        int ret = -ENOMEM;
+       int queue;
+       int i;
 
        if (priv->hw->mode->set_16kib_bfsize)
                bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu);
@@ -1033,235 +1234,409 @@ static int init_dma_desc_rings(struct ne
 
        priv->dma_buf_sz = bfsize;
 
-       netif_dbg(priv, probe, priv->dev,
-                 "(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n",
-                 __func__, (u32)priv->dma_rx_phy, (u32)priv->dma_tx_phy);
-
        /* RX INITIALIZATION */
        netif_dbg(priv, probe, priv->dev,
                  "SKB addresses:\nskb\t\tskb data\tdma data\n");
 
-       for (i = 0; i < DMA_RX_SIZE; i++) {
-               struct dma_desc *p;
-               if (priv->extend_desc)
-                       p = &((priv->dma_erx + i)->basic);
-               else
-                       p = priv->dma_rx + i;
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-               ret = stmmac_init_rx_buffers(priv, p, i, flags);
-               if (ret)
-                       goto err_init_rx_buffers;
+               netif_dbg(priv, probe, priv->dev,
+                         "(%s) dma_rx_phy=0x%08x\n", __func__,
+                         (u32)rx_q->dma_rx_phy);
 
-               netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
-                         priv->rx_skbuff[i], priv->rx_skbuff[i]->data,
-                         (unsigned int)priv->rx_skbuff_dma[i]);
+               for (i = 0; i < DMA_RX_SIZE; i++) {
+                       struct dma_desc *p;
+
+                       if (priv->extend_desc)
+                               p = &((rx_q->dma_erx + i)->basic);
+                       else
+                               p = rx_q->dma_rx + i;
+
+                       ret = stmmac_init_rx_buffers(priv, p, i, flags,
+                                                    queue);
+                       if (ret)
+                               goto err_init_rx_buffers;
+
+                       netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
+                                 rx_q->rx_skbuff[i], rx_q->rx_skbuff[i]->data,
+                                 (unsigned int)rx_q->rx_skbuff_dma[i]);
+               }
+
+               rx_q->cur_rx = 0;
+               rx_q->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
+
+               stmmac_clear_rx_descriptors(priv, queue);
+
+               /* Setup the chained descriptor addresses */
+               if (priv->mode == STMMAC_CHAIN_MODE) {
+                       if (priv->extend_desc)
+                               priv->hw->mode->init(rx_q->dma_erx,
+                                                    rx_q->dma_rx_phy,
+                                                    DMA_RX_SIZE, 1);
+                       else
+                               priv->hw->mode->init(rx_q->dma_rx,
+                                                    rx_q->dma_rx_phy,
+                                                    DMA_RX_SIZE, 0);
+               }
        }
-       priv->cur_rx = 0;
-       priv->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
+
        buf_sz = bfsize;
 
-       /* Setup the chained descriptor addresses */
-       if (priv->mode == STMMAC_CHAIN_MODE) {
-               if (priv->extend_desc) {
-                       priv->hw->mode->init(priv->dma_erx, priv->dma_rx_phy,
-                                            DMA_RX_SIZE, 1);
-                       priv->hw->mode->init(priv->dma_etx, priv->dma_tx_phy,
-                                            DMA_TX_SIZE, 1);
-               } else {
-                       priv->hw->mode->init(priv->dma_rx, priv->dma_rx_phy,
-                                            DMA_RX_SIZE, 0);
-                       priv->hw->mode->init(priv->dma_tx, priv->dma_tx_phy,
-                                            DMA_TX_SIZE, 0);
-               }
+       return 0;
+
+err_init_rx_buffers:
+       while (queue >= 0) {
+               while (--i >= 0)
+                       stmmac_free_rx_buffer(priv, queue, i);
+
+               if (queue == 0)
+                       break;
+
+               i = DMA_RX_SIZE;
+               queue--;
        }
 
-       /* TX INITIALIZATION */
-       for (i = 0; i < DMA_TX_SIZE; i++) {
-               struct dma_desc *p;
-               if (priv->extend_desc)
-                       p = &((priv->dma_etx + i)->basic);
-               else
-                       p = priv->dma_tx + i;
+       return ret;
+}
 
-               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-                       p->des0 = 0;
-                       p->des1 = 0;
-                       p->des2 = 0;
-                       p->des3 = 0;
-               } else {
-                       p->des2 = 0;
+/**
+ * init_dma_tx_desc_rings - init the TX descriptor rings
+ * @dev: net device structure.
+ * Description: this function initializes the DMA TX descriptors
+ * and allocates the socket buffers. It supports the chained and ring
+ * modes.
+ */
+static int init_dma_tx_desc_rings(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+       int i;
+
+       for (queue = 0; queue < tx_queue_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               netif_dbg(priv, probe, priv->dev,
+                         "(%s) dma_tx_phy=0x%08x\n", __func__,
+                        (u32)tx_q->dma_tx_phy);
+
+               /* Setup the chained descriptor addresses */
+               if (priv->mode == STMMAC_CHAIN_MODE) {
+                       if (priv->extend_desc)
+                               priv->hw->mode->init(tx_q->dma_etx,
+                                                    tx_q->dma_tx_phy,
+                                                    DMA_TX_SIZE, 1);
+                       else
+                               priv->hw->mode->init(tx_q->dma_tx,
+                                                    tx_q->dma_tx_phy,
+                                                    DMA_TX_SIZE, 0);
+               }
+
+               for (i = 0; i < DMA_TX_SIZE; i++) {
+                       struct dma_desc *p;
+                       if (priv->extend_desc)
+                               p = &((tx_q->dma_etx + i)->basic);
+                       else
+                               p = tx_q->dma_tx + i;
+
+                       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+                               p->des0 = 0;
+                               p->des1 = 0;
+                               p->des2 = 0;
+                               p->des3 = 0;
+                       } else {
+                               p->des2 = 0;
+                       }
+
+                       tx_q->tx_skbuff_dma[i].buf = 0;
+                       tx_q->tx_skbuff_dma[i].map_as_page = false;
+                       tx_q->tx_skbuff_dma[i].len = 0;
+                       tx_q->tx_skbuff_dma[i].last_segment = false;
+                       tx_q->tx_skbuff[i] = NULL;
                }
 
-               priv->tx_skbuff_dma[i].buf = 0;
-               priv->tx_skbuff_dma[i].map_as_page = false;
-               priv->tx_skbuff_dma[i].len = 0;
-               priv->tx_skbuff_dma[i].last_segment = false;
-               priv->tx_skbuff[i] = NULL;
+               tx_q->dirty_tx = 0;
+               tx_q->cur_tx = 0;
+
+               netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
-       netdev_reset_queue(priv->dev);
+       return 0;
+}
+
+/**
+ * init_dma_desc_rings - init the RX/TX descriptor rings
+ * @dev: net device structure
+ * @flags: gfp flag.
+ * Description: this function initializes the DMA RX/TX descriptors
+ * and allocates the socket buffers. It supports the chained and ring
+ * modes.
+ */
+static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+       int ret;
+
+       ret = init_dma_rx_desc_rings(dev, flags);
+       if (ret)
+               return ret;
+
+       ret = init_dma_tx_desc_rings(dev);
 
        stmmac_clear_descriptors(priv);
 
        if (netif_msg_hw(priv))
                stmmac_display_rings(priv);
 
-       return 0;
-err_init_rx_buffers:
-       while (--i >= 0)
-               stmmac_free_rx_buffers(priv, i);
        return ret;
 }
 
-static void dma_free_rx_skbufs(struct stmmac_priv *priv)
+/**
+ * dma_free_rx_skbufs - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
        int i;
 
        for (i = 0; i < DMA_RX_SIZE; i++)
-               stmmac_free_rx_buffers(priv, i);
+               stmmac_free_rx_buffer(priv, queue, i);
 }
 
-static void dma_free_tx_skbufs(struct stmmac_priv *priv)
+/**
+ * dma_free_tx_skbufs - free TX dma buffers
+ * @priv: private structure
+ * @queue: TX queue index
+ */
+static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
        int i;
 
-       for (i = 0; i < DMA_TX_SIZE; i++) {
-               if (priv->tx_skbuff_dma[i].buf) {
-                       if (priv->tx_skbuff_dma[i].map_as_page)
-                               dma_unmap_page(priv->device,
-                                              priv->tx_skbuff_dma[i].buf,
-                                              priv->tx_skbuff_dma[i].len,
-                                              DMA_TO_DEVICE);
-                       else
-                               dma_unmap_single(priv->device,
-                                                priv->tx_skbuff_dma[i].buf,
-                                                priv->tx_skbuff_dma[i].len,
-                                                DMA_TO_DEVICE);
-               }
+       for (i = 0; i < DMA_TX_SIZE; i++)
+               stmmac_free_tx_buffer(priv, queue, i);
+}
 
-               if (priv->tx_skbuff[i]) {
-                       dev_kfree_skb_any(priv->tx_skbuff[i]);
-                       priv->tx_skbuff[i] = NULL;
-                       priv->tx_skbuff_dma[i].buf = 0;
-                       priv->tx_skbuff_dma[i].map_as_page = false;
-               }
+/**
+ * free_dma_rx_desc_resources - free RX dma desc resources
+ * @priv: private structure
+ */
+static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+
+       /* Free RX queue resources */
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               /* Release the DMA RX socket buffers */
+               dma_free_rx_skbufs(priv, queue);
+
+               /* Free DMA regions of consistent memory previously allocated */
+               if (!priv->extend_desc)
+                       dma_free_coherent(priv->device,
+                                         DMA_RX_SIZE * sizeof(struct dma_desc),
+                                         rx_q->dma_rx, rx_q->dma_rx_phy);
+               else
+                       dma_free_coherent(priv->device, DMA_RX_SIZE *
+                                         sizeof(struct dma_extended_desc),
+                                         rx_q->dma_erx, rx_q->dma_rx_phy);
+
+               kfree(rx_q->rx_skbuff_dma);
+               kfree(rx_q->rx_skbuff);
        }
 }
 
 /**
- * alloc_dma_desc_resources - alloc TX/RX resources.
+ * free_dma_tx_desc_resources - free TX dma desc resources
+ * @priv: private structure
+ */
+static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue = 0;
+
+       /* Free TX queue resources */
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               /* Release the DMA TX socket buffers */
+               dma_free_tx_skbufs(priv, queue);
+
+               /* Free DMA regions of consistent memory previously allocated */
+               if (!priv->extend_desc)
+                       dma_free_coherent(priv->device,
+                                         DMA_TX_SIZE * sizeof(struct dma_desc),
+                                         tx_q->dma_tx, tx_q->dma_tx_phy);
+               else
+                       dma_free_coherent(priv->device, DMA_TX_SIZE *
+                                         sizeof(struct dma_extended_desc),
+                                         tx_q->dma_etx, tx_q->dma_tx_phy);
+
+               kfree(tx_q->tx_skbuff_dma);
+               kfree(tx_q->tx_skbuff);
+       }
+}
+
+/**
+ * alloc_dma_rx_desc_resources - alloc RX resources.
  * @priv: private structure
  * Description: according to which descriptor can be used (extend or basic)
  * this function allocates the resources for TX and RX paths. In case of
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
-static int alloc_dma_desc_resources(struct stmmac_priv *priv)
+static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 {
+       u32 rx_count = priv->plat->rx_queues_to_use;
        int ret = -ENOMEM;
+       u32 queue;
 
-       priv->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE, sizeof(dma_addr_t),
-                                           GFP_KERNEL);
-       if (!priv->rx_skbuff_dma)
-               return -ENOMEM;
+       /* RX queues buffers and DMA */
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-       priv->rx_skbuff = kmalloc_array(DMA_RX_SIZE, sizeof(struct sk_buff *),
-                                       GFP_KERNEL);
-       if (!priv->rx_skbuff)
-               goto err_rx_skbuff;
-
-       priv->tx_skbuff_dma = kmalloc_array(DMA_TX_SIZE,
-                                           sizeof(*priv->tx_skbuff_dma),
-                                           GFP_KERNEL);
-       if (!priv->tx_skbuff_dma)
-               goto err_tx_skbuff_dma;
-
-       priv->tx_skbuff = kmalloc_array(DMA_TX_SIZE, sizeof(struct sk_buff *),
-                                       GFP_KERNEL);
-       if (!priv->tx_skbuff)
-               goto err_tx_skbuff;
-
-       if (priv->extend_desc) {
-               priv->dma_erx = dma_zalloc_coherent(priv->device, DMA_RX_SIZE *
-                                                   sizeof(struct
-                                                          dma_extended_desc),
-                                                   &priv->dma_rx_phy,
-                                                   GFP_KERNEL);
-               if (!priv->dma_erx)
-                       goto err_dma;
+               rx_q->queue_index = queue;
+               rx_q->priv_data = priv;
 
-               priv->dma_etx = dma_zalloc_coherent(priv->device, DMA_TX_SIZE *
-                                                   sizeof(struct
-                                                          dma_extended_desc),
-                                                   &priv->dma_tx_phy,
+               rx_q->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE,
+                                                   sizeof(dma_addr_t),
                                                    GFP_KERNEL);
-               if (!priv->dma_etx) {
-                       dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                         sizeof(struct dma_extended_desc),
-                                         priv->dma_erx, priv->dma_rx_phy);
-                       goto err_dma;
-               }
-       } else {
-               priv->dma_rx = dma_zalloc_coherent(priv->device, DMA_RX_SIZE *
-                                                  sizeof(struct dma_desc),
-                                                  &priv->dma_rx_phy,
-                                                  GFP_KERNEL);
-               if (!priv->dma_rx)
-                       goto err_dma;
+               if (!rx_q->rx_skbuff_dma)
+                       return -ENOMEM;
 
-               priv->dma_tx = dma_zalloc_coherent(priv->device, DMA_TX_SIZE *
-                                                  sizeof(struct dma_desc),
-                                                  &priv->dma_tx_phy,
-                                                  GFP_KERNEL);
-               if (!priv->dma_tx) {
-                       dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                         sizeof(struct dma_desc),
-                                         priv->dma_rx, priv->dma_rx_phy);
+               rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
+                                               sizeof(struct sk_buff *),
+                                               GFP_KERNEL);
+               if (!rx_q->rx_skbuff)
                        goto err_dma;
+
+               if (priv->extend_desc) {
+                       rx_q->dma_erx = dma_zalloc_coherent(priv->device,
+                                                           DMA_RX_SIZE *
+                                                           sizeof(struct
+                                                           dma_extended_desc),
+                                                           &rx_q->dma_rx_phy,
+                                                           GFP_KERNEL);
+                       if (!rx_q->dma_erx)
+                               goto err_dma;
+
+               } else {
+                       rx_q->dma_rx = dma_zalloc_coherent(priv->device,
+                                                          DMA_RX_SIZE *
+                                                          sizeof(struct
+                                                          dma_desc),
+                                                          &rx_q->dma_rx_phy,
+                                                          GFP_KERNEL);
+                       if (!rx_q->dma_rx)
+                               goto err_dma;
                }
        }
 
        return 0;
 
 err_dma:
-       kfree(priv->tx_skbuff);
-err_tx_skbuff:
-       kfree(priv->tx_skbuff_dma);
-err_tx_skbuff_dma:
-       kfree(priv->rx_skbuff);
-err_rx_skbuff:
-       kfree(priv->rx_skbuff_dma);
+       free_dma_rx_desc_resources(priv);
+
+       return ret;
+}
+
+/**
+ * alloc_dma_tx_desc_resources - alloc TX resources.
+ * @priv: private structure
+ * Description: according to which descriptor can be used (extend or basic)
+ * this function allocates the resources for TX and RX paths. In case of
+ * reception, for example, it pre-allocated the RX socket buffer in order to
+ * allow zero-copy mechanism.
+ */
+static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       int ret = -ENOMEM;
+       u32 queue;
+
+       /* TX queues buffers and DMA */
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               tx_q->queue_index = queue;
+               tx_q->priv_data = priv;
+
+               tx_q->tx_skbuff_dma = kmalloc_array(DMA_TX_SIZE,
+                                                   sizeof(*tx_q->tx_skbuff_dma),
+                                                   GFP_KERNEL);
+               if (!tx_q->tx_skbuff_dma)
+                       return -ENOMEM;
+
+               tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
+                                               sizeof(struct sk_buff *),
+                                               GFP_KERNEL);
+               if (!tx_q->tx_skbuff)
+                       goto err_dma_buffers;
+
+               if (priv->extend_desc) {
+                       tx_q->dma_etx = dma_zalloc_coherent(priv->device,
+                                                           DMA_TX_SIZE *
+                                                           sizeof(struct
+                                                           dma_extended_desc),
+                                                           &tx_q->dma_tx_phy,
+                                                           GFP_KERNEL);
+                       if (!tx_q->dma_etx)
+                               goto err_dma_buffers;
+               } else {
+                       tx_q->dma_tx = dma_zalloc_coherent(priv->device,
+                                                          DMA_TX_SIZE *
+                                                          sizeof(struct
+                                                                 dma_desc),
+                                                          &tx_q->dma_tx_phy,
+                                                          GFP_KERNEL);
+                       if (!tx_q->dma_tx)
+                               goto err_dma_buffers;
+               }
+       }
+
+       return 0;
+
+err_dma_buffers:
+       free_dma_tx_desc_resources(priv);
+
+       return ret;
+}
+
+/**
+ * alloc_dma_desc_resources - alloc TX/RX resources.
+ * @priv: private structure
+ * Description: according to which descriptor can be used (extend or basic)
+ * this function allocates the resources for TX and RX paths. In case of
+ * reception, for example, it pre-allocated the RX socket buffer in order to
+ * allow zero-copy mechanism.
+ */
+static int alloc_dma_desc_resources(struct stmmac_priv *priv)
+{
+       /* RX Allocation */
+       int ret = alloc_dma_rx_desc_resources(priv);
+
+       if (ret)
+               return ret;
+
+       ret = alloc_dma_tx_desc_resources(priv);
+
        return ret;
 }
 
+/**
+ * free_dma_desc_resources - free dma desc resources
+ * @priv: private structure
+ */
 static void free_dma_desc_resources(struct stmmac_priv *priv)
 {
-       /* Release the DMA TX/RX socket buffers */
-       dma_free_rx_skbufs(priv);
-       dma_free_tx_skbufs(priv);
-
-       /* Free DMA regions of consistent memory previously allocated */
-       if (!priv->extend_desc) {
-               dma_free_coherent(priv->device,
-                                 DMA_TX_SIZE * sizeof(struct dma_desc),
-                                 priv->dma_tx, priv->dma_tx_phy);
-               dma_free_coherent(priv->device,
-                                 DMA_RX_SIZE * sizeof(struct dma_desc),
-                                 priv->dma_rx, priv->dma_rx_phy);
-       } else {
-               dma_free_coherent(priv->device, DMA_TX_SIZE *
-                                 sizeof(struct dma_extended_desc),
-                                 priv->dma_etx, priv->dma_tx_phy);
-               dma_free_coherent(priv->device, DMA_RX_SIZE *
-                                 sizeof(struct dma_extended_desc),
-                                 priv->dma_erx, priv->dma_rx_phy);
-       }
-       kfree(priv->rx_skbuff_dma);
-       kfree(priv->rx_skbuff);
-       kfree(priv->tx_skbuff_dma);
-       kfree(priv->tx_skbuff);
+       /* Release the DMA RX socket buffers */
+       free_dma_rx_desc_resources(priv);
+
+       /* Release the DMA TX socket buffers */
+       free_dma_tx_desc_resources(priv);
 }
 
 /**
@@ -1271,19 +1646,104 @@ static void free_dma_desc_resources(stru
  */
 static void stmmac_mac_enable_rx_queues(struct stmmac_priv *priv)
 {
-       int rx_count = priv->dma_cap.number_rx_queues;
-       int queue = 0;
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       int queue;
+       u8 mode;
 
-       /* If GMAC does not have multiple queues, then this is not necessary*/
-       if (rx_count == 1)
-               return;
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               mode = priv->plat->rx_queues_cfg[queue].mode_to_use;
+               priv->hw->mac->rx_queue_enable(priv->hw, mode, queue);
+       }
+}
 
-       /**
-        *  If the core is synthesized with multiple rx queues / multiple
-        *  dma channels, then rx queues will be disabled by default.
-        *  For now only rx queue 0 is enabled.
-        */
-       priv->hw->mac->rx_queue_enable(priv->hw, queue);
+/**
+ * stmmac_start_rx_dma - start RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This starts a RX DMA channel
+ */
+static void stmmac_start_rx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA RX processes started in channel %d\n", chan);
+       priv->hw->dma->start_rx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_start_tx_dma - start TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This starts a TX DMA channel
+ */
+static void stmmac_start_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes started in channel %d\n", chan);
+       priv->hw->dma->start_tx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_stop_rx_dma - stop RX DMA channel
+ * @priv: driver private structure
+ * @chan: RX channel index
+ * Description:
+ * This stops a RX DMA channel
+ */
+static void stmmac_stop_rx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA RX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_rx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_stop_tx_dma - stop TX DMA channel
+ * @priv: driver private structure
+ * @chan: TX channel index
+ * Description:
+ * This stops a TX DMA channel
+ */
+static void stmmac_stop_tx_dma(struct stmmac_priv *priv, u32 chan)
+{
+       netdev_dbg(priv->dev, "DMA TX processes stopped in channel %d\n", chan);
+       priv->hw->dma->stop_tx(priv->ioaddr, chan);
+}
+
+/**
+ * stmmac_start_all_dma - start all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This starts all the RX and TX DMA channels
+ */
+static void stmmac_start_all_dma(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
+
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_start_rx_dma(priv, chan);
+
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_start_tx_dma(priv, chan);
+}
+
+/**
+ * stmmac_stop_all_dma - stop all RX and TX DMA channels
+ * @priv: driver private structure
+ * Description:
+ * This stops the RX and TX DMA channels
+ */
+static void stmmac_stop_all_dma(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan = 0;
+
+       for (chan = 0; chan < rx_channels_count; chan++)
+               stmmac_stop_rx_dma(priv, chan);
+
+       for (chan = 0; chan < tx_channels_count; chan++)
+               stmmac_stop_tx_dma(priv, chan);
 }
 
 /**
@@ -1294,11 +1754,20 @@ static void stmmac_mac_enable_rx_queues(
  */
 static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
        int rxfifosz = priv->plat->rx_fifo_size;
-
-       if (priv->plat->force_thresh_dma_mode)
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc, rxfifosz);
-       else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
+       u32 txmode = 0;
+       u32 rxmode = 0;
+       u32 chan = 0;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
+
+       if (priv->plat->force_thresh_dma_mode) {
+               txmode = tc;
+               rxmode = tc;
+       } else if (priv->plat->force_sf_dma_mode || priv->plat->tx_coe) {
                /*
                 * In case of GMAC, SF mode can be enabled
                 * to perform the TX COE in HW. This depends on:
@@ -1306,37 +1775,53 @@ static void stmmac_dma_operation_mode(st
                 * 2) There is no bugged Jumbo frame support
                 *    that needs to not insert csum in the TDES.
                 */
-               priv->hw->dma->dma_mode(priv->ioaddr, SF_DMA_MODE, SF_DMA_MODE,
-                                       rxfifosz);
+               txmode = SF_DMA_MODE;
+               rxmode = SF_DMA_MODE;
                priv->xstats.threshold = SF_DMA_MODE;
-       } else
-               priv->hw->dma->dma_mode(priv->ioaddr, tc, SF_DMA_MODE,
+       } else {
+               txmode = tc;
+               rxmode = SF_DMA_MODE;
+       }
+
+       /* configure all channels */
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                                  rxfifosz);
+
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
                                        rxfifosz);
+       }
 }
 
 /**
  * stmmac_tx_clean - to manage the transmission completion
  * @priv: driver private structure
+ * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv)
+static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        unsigned int bytes_compl = 0, pkts_compl = 0;
-       unsigned int entry = priv->dirty_tx;
+       unsigned int entry = tx_q->dirty_tx;
 
        netif_tx_lock(priv->dev);
 
        priv->xstats.tx_clean++;
 
-       while (entry != priv->cur_tx) {
-               struct sk_buff *skb = priv->tx_skbuff[entry];
+       while (entry != tx_q->cur_tx) {
+               struct sk_buff *skb = tx_q->tx_skbuff[entry];
                struct dma_desc *p;
                int status;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_etx + entry);
+                       p = (struct dma_desc *)(tx_q->dma_etx + entry);
                else
-                       p = priv->dma_tx + entry;
+                       p = tx_q->dma_tx + entry;
 
                status = priv->hw->desc->tx_status(&priv->dev->stats,
                                                      &priv->xstats, p,
@@ -1362,48 +1847,51 @@ static void stmmac_tx_clean(struct stmma
                        stmmac_get_tx_hwtstamp(priv, p, skb);
                }
 
-               if (likely(priv->tx_skbuff_dma[entry].buf)) {
-                       if (priv->tx_skbuff_dma[entry].map_as_page)
+               if (likely(tx_q->tx_skbuff_dma[entry].buf)) {
+                       if (tx_q->tx_skbuff_dma[entry].map_as_page)
                                dma_unmap_page(priv->device,
-                                              priv->tx_skbuff_dma[entry].buf,
-                                              priv->tx_skbuff_dma[entry].len,
+                                              tx_q->tx_skbuff_dma[entry].buf,
+                                              tx_q->tx_skbuff_dma[entry].len,
                                               DMA_TO_DEVICE);
                        else
                                dma_unmap_single(priv->device,
-                                                priv->tx_skbuff_dma[entry].buf,
-                                                priv->tx_skbuff_dma[entry].len,
+                                                tx_q->tx_skbuff_dma[entry].buf,
+                                                tx_q->tx_skbuff_dma[entry].len,
                                                 DMA_TO_DEVICE);
-                       priv->tx_skbuff_dma[entry].buf = 0;
-                       priv->tx_skbuff_dma[entry].len = 0;
-                       priv->tx_skbuff_dma[entry].map_as_page = false;
+                       tx_q->tx_skbuff_dma[entry].buf = 0;
+                       tx_q->tx_skbuff_dma[entry].len = 0;
+                       tx_q->tx_skbuff_dma[entry].map_as_page = false;
                }
 
                if (priv->hw->mode->clean_desc3)
-                       priv->hw->mode->clean_desc3(priv, p);
+                       priv->hw->mode->clean_desc3(tx_q, p);
 
-               priv->tx_skbuff_dma[entry].last_segment = false;
-               priv->tx_skbuff_dma[entry].is_jumbo = false;
+               tx_q->tx_skbuff_dma[entry].last_segment = false;
+               tx_q->tx_skbuff_dma[entry].is_jumbo = false;
 
                if (likely(skb != NULL)) {
                        pkts_compl++;
                        bytes_compl += skb->len;
                        dev_consume_skb_any(skb);
-                       priv->tx_skbuff[entry] = NULL;
+                       tx_q->tx_skbuff[entry] = NULL;
                }
 
                priv->hw->desc->release_tx_desc(p, priv->mode);
 
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
        }
-       priv->dirty_tx = entry;
+       tx_q->dirty_tx = entry;
+
+       netdev_tx_completed_queue(netdev_get_tx_queue(priv->dev, queue),
+                                 pkts_compl, bytes_compl);
 
-       netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
+       if (unlikely(netif_tx_queue_stopped(netdev_get_tx_queue(priv->dev,
+                                                               queue))) &&
+           stmmac_tx_avail(priv, queue) > STMMAC_TX_THRESH) {
 
-       if (unlikely(netif_queue_stopped(priv->dev) &&
-           stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
                netif_dbg(priv, tx_done, priv->dev,
                          "%s: restart transmit\n", __func__);
-               netif_wake_queue(priv->dev);
+               netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
@@ -1413,45 +1901,76 @@ static void stmmac_tx_clean(struct stmma
        netif_tx_unlock(priv->dev);
 }
 
-static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->enable_dma_irq(priv->ioaddr);
+       priv->hw->dma->enable_dma_irq(priv->ioaddr, chan);
 }
 
-static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv)
+static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv, u32 chan)
 {
-       priv->hw->dma->disable_dma_irq(priv->ioaddr);
+       priv->hw->dma->disable_dma_irq(priv->ioaddr, chan);
 }
 
 /**
  * stmmac_tx_err - to manage the tx error
  * @priv: driver private structure
+ * @chan: channel index
  * Description: it cleans the descriptors and restarts the transmission
  * in case of transmission errors.
  */
-static void stmmac_tx_err(struct stmmac_priv *priv)
+static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
        int i;
-       netif_stop_queue(priv->dev);
 
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       dma_free_tx_skbufs(priv);
+       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, chan));
+
+       stmmac_stop_tx_dma(priv, chan);
+       dma_free_tx_skbufs(priv, chan);
        for (i = 0; i < DMA_TX_SIZE; i++)
                if (priv->extend_desc)
-                       priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_etx[i].basic,
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
                else
-                       priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+                       priv->hw->desc->init_tx_desc(&tx_q->dma_tx[i],
                                                     priv->mode,
                                                     (i == DMA_TX_SIZE - 1));
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
-       netdev_reset_queue(priv->dev);
-       priv->hw->dma->start_tx(priv->ioaddr);
+       tx_q->dirty_tx = 0;
+       tx_q->cur_tx = 0;
+       netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
+       stmmac_start_tx_dma(priv, chan);
 
        priv->dev->stats.tx_errors++;
-       netif_wake_queue(priv->dev);
+       netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, chan));
+}
+
+/**
+ *  stmmac_set_dma_operation_mode - Set DMA operation mode by channel
+ *  @priv: driver private structure
+ *  @txmode: TX operating mode
+ *  @rxmode: RX operating mode
+ *  @chan: channel index
+ *  Description: it is used for configuring of the DMA operation mode in
+ *  runtime in order to program the tx/rx DMA thresholds or Store-And-Forward
+ *  mode.
+ */
+static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
+                                         u32 rxmode, u32 chan)
+{
+       int rxfifosz = priv->plat->rx_fifo_size;
+
+       if (rxfifosz == 0)
+               rxfifosz = priv->dma_cap.rx_fifo_size;
+
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
+                                          rxfifosz);
+               priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+       } else {
+               priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
+                                       rxfifosz);
+       }
 }
 
 /**
@@ -1463,31 +1982,43 @@ static void stmmac_tx_err(struct stmmac_
  */
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
+       u32 tx_channel_count = priv->plat->tx_queues_to_use;
        int status;
-       int rxfifosz = priv->plat->rx_fifo_size;
+       u32 chan;
+
+       for (chan = 0; chan < tx_channel_count; chan++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
 
-       status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
-       if (likely((status & handle_rx)) || (status & handle_tx)) {
-               if (likely(napi_schedule_prep(&priv->napi))) {
-                       stmmac_disable_dma_irq(priv);
-                       __napi_schedule(&priv->napi);
+               status = priv->hw->dma->dma_interrupt(priv->ioaddr,
+                                                     &priv->xstats, chan);
+               if (likely((status & handle_rx)) || (status & handle_tx)) {
+                       if (likely(napi_schedule_prep(&rx_q->napi))) {
+                               stmmac_disable_dma_irq(priv, chan);
+                               __napi_schedule(&rx_q->napi);
+                       }
                }
-       }
-       if (unlikely(status & tx_hard_error_bump_tc)) {
-               /* Try to bump up the dma threshold on this failure */
-               if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
-                   (tc <= 256)) {
-                       tc += 64;
-                       if (priv->plat->force_thresh_dma_mode)
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc, tc,
-                                                       rxfifosz);
-                       else
-                               priv->hw->dma->dma_mode(priv->ioaddr, tc,
-                                                       SF_DMA_MODE, rxfifosz);
-                       priv->xstats.threshold = tc;
+
+               if (unlikely(status & tx_hard_error_bump_tc)) {
+                       /* Try to bump up the dma threshold on this failure */
+                       if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
+                           (tc <= 256)) {
+                               tc += 64;
+                               if (priv->plat->force_thresh_dma_mode)
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                     tc,
+                                                                     tc,
+                                                                     chan);
+                               else
+                                       stmmac_set_dma_operation_mode(priv,
+                                                                   tc,
+                                                                   SF_DMA_MODE,
+                                                                   chan);
+                               priv->xstats.threshold = tc;
+                       }
+               } else if (unlikely(status == tx_hard_error)) {
+                       stmmac_tx_err(priv, chan);
                }
-       } else if (unlikely(status == tx_hard_error))
-               stmmac_tx_err(priv);
+       }
 }
 
 /**
@@ -1594,6 +2125,13 @@ static void stmmac_check_ether_addr(stru
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       struct stmmac_rx_queue *rx_q;
+       struct stmmac_tx_queue *tx_q;
+       u32 dummy_dma_rx_phy = 0;
+       u32 dummy_dma_tx_phy = 0;
+       u32 chan = 0;
        int atds = 0;
        int ret = 0;
 
@@ -1611,19 +2149,49 @@ static int stmmac_init_dma_engine(struct
                return ret;
        }
 
-       priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
-                           priv->dma_tx_phy, priv->dma_rx_phy, atds);
-
        if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-               priv->rx_tail_addr = priv->dma_rx_phy +
-                           (DMA_RX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, priv->rx_tail_addr,
-                                              STMMAC_CHAN0);
-
-               priv->tx_tail_addr = priv->dma_tx_phy +
-                           (DMA_TX_SIZE * sizeof(struct dma_desc));
-               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                              STMMAC_CHAN0);
+               /* DMA Configuration */
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   dummy_dma_tx_phy, dummy_dma_rx_phy, atds);
+
+               /* DMA RX Channel Configuration */
+               for (chan = 0; chan < rx_channels_count; chan++) {
+                       rx_q = &priv->rx_queue[chan];
+
+                       priv->hw->dma->init_rx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   rx_q->dma_rx_phy, chan);
+
+                       rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+                                   (DMA_RX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                      rx_q->rx_tail_addr,
+                                                      chan);
+               }
+
+               /* DMA TX Channel Configuration */
+               for (chan = 0; chan < tx_channels_count; chan++) {
+                       tx_q = &priv->tx_queue[chan];
+
+                       priv->hw->dma->init_chan(priv->ioaddr,
+                                                priv->plat->dma_cfg,
+                                                chan);
+
+                       priv->hw->dma->init_tx_chan(priv->ioaddr,
+                                                   priv->plat->dma_cfg,
+                                                   tx_q->dma_tx_phy, chan);
+
+                       tx_q->tx_tail_addr = tx_q->dma_tx_phy +
+                                   (DMA_TX_SIZE * sizeof(struct dma_desc));
+                       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr,
+                                                      tx_q->tx_tail_addr,
+                                                      chan);
+               }
+       } else {
+               rx_q = &priv->rx_queue[chan];
+               tx_q = &priv->tx_queue[chan];
+               priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+                                   tx_q->dma_tx_phy, rx_q->dma_rx_phy, atds);
        }
 
        if (priv->plat->axi && priv->hw->dma->axi)
@@ -1641,8 +2209,12 @@ static int stmmac_init_dma_engine(struct
 static void stmmac_tx_timer(unsigned long data)
 {
        struct stmmac_priv *priv = (struct stmmac_priv *)data;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue;
 
-       stmmac_tx_clean(priv);
+       /* let's scan all the tx queues */
+       for (queue = 0; queue < tx_queues_count; queue++)
+               stmmac_tx_clean(priv, queue);
 }
 
 /**
@@ -1664,6 +2236,196 @@ static void stmmac_init_tx_coalesce(stru
        add_timer(&priv->txtimer);
 }
 
+static void stmmac_set_rings_length(struct stmmac_priv *priv)
+{
+       u32 rx_channels_count = priv->plat->rx_queues_to_use;
+       u32 tx_channels_count = priv->plat->tx_queues_to_use;
+       u32 chan;
+
+       /* set TX ring length */
+       if (priv->hw->dma->set_tx_ring_len) {
+               for (chan = 0; chan < tx_channels_count; chan++)
+                       priv->hw->dma->set_tx_ring_len(priv->ioaddr,
+                                                      (DMA_TX_SIZE - 1), chan);
+       }
+
+       /* set RX ring length */
+       if (priv->hw->dma->set_rx_ring_len) {
+               for (chan = 0; chan < rx_channels_count; chan++)
+                       priv->hw->dma->set_rx_ring_len(priv->ioaddr,
+                                                      (DMA_RX_SIZE - 1), chan);
+       }
+}
+
+/**
+ *  stmmac_set_tx_queue_weight - Set TX queue weight
+ *  @priv: driver private structure
+ *  Description: It is used for setting TX queues weight
+ */
+static void stmmac_set_tx_queue_weight(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 weight;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               weight = priv->plat->tx_queues_cfg[queue].weight;
+               priv->hw->mac->set_mtl_tx_queue_weight(priv->hw, weight, queue);
+       }
+}
+
+/**
+ *  stmmac_configure_cbs - Configure CBS in TX queue
+ *  @priv: driver private structure
+ *  Description: It is used for configuring CBS in AVB TX queues
+ */
+static void stmmac_configure_cbs(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 mode_to_use;
+       u32 queue;
+
+       /* queue 0 is reserved for legacy traffic */
+       for (queue = 1; queue < tx_queues_count; queue++) {
+               mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
+               if (mode_to_use == MTL_QUEUE_DCB)
+                       continue;
+
+               priv->hw->mac->config_cbs(priv->hw,
+                               priv->plat->tx_queues_cfg[queue].send_slope,
+                               priv->plat->tx_queues_cfg[queue].idle_slope,
+                               priv->plat->tx_queues_cfg[queue].high_credit,
+                               priv->plat->tx_queues_cfg[queue].low_credit,
+                               queue);
+       }
+}
+
+/**
+ *  stmmac_rx_queue_dma_chan_map - Map RX queue to RX dma channel
+ *  @priv: driver private structure
+ *  Description: It is used for mapping RX queues to RX dma channels
+ */
+static void stmmac_rx_queue_dma_chan_map(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 chan;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               chan = priv->plat->rx_queues_cfg[queue].chan;
+               priv->hw->mac->map_mtl_to_dma(priv->hw, queue, chan);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_prio - Configure RX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX Queue Priority
+ */
+static void stmmac_mac_config_rx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               if (!priv->plat->rx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->rx_queues_cfg[queue].prio;
+               priv->hw->mac->rx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_tx_queues_prio - Configure TX Queue priority
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the TX Queue Priority
+ */
+static void stmmac_mac_config_tx_queues_prio(struct stmmac_priv *priv)
+{
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+       u32 queue;
+       u32 prio;
+
+       for (queue = 0; queue < tx_queues_count; queue++) {
+               if (!priv->plat->tx_queues_cfg[queue].use_prio)
+                       continue;
+
+               prio = priv->plat->tx_queues_cfg[queue].prio;
+               priv->hw->mac->tx_queue_prio(priv->hw, prio, queue);
+       }
+}
+
+/**
+ *  stmmac_mac_config_rx_queues_routing - Configure RX Queue Routing
+ *  @priv: driver private structure
+ *  Description: It is used for configuring the RX queue routing
+ */
+static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       u8 packet;
+
+       for (queue = 0; queue < rx_queues_count; queue++) {
+               /* no specific packet type routing specified for the queue */
+               if (priv->plat->rx_queues_cfg[queue].pkt_route == 0x0)
+                       continue;
+
+               packet = priv->plat->rx_queues_cfg[queue].pkt_route;
+               priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+       }
+}
+
+/**
+ *  stmmac_mtl_configuration - Configure MTL
+ *  @priv: driver private structure
+ *  Description: It is used for configurring MTL
+ */
+static void stmmac_mtl_configuration(struct stmmac_priv *priv)
+{
+       u32 rx_queues_count = priv->plat->rx_queues_to_use;
+       u32 tx_queues_count = priv->plat->tx_queues_to_use;
+
+       if (tx_queues_count > 1 && priv->hw->mac->set_mtl_tx_queue_weight)
+               stmmac_set_tx_queue_weight(priv);
+
+       /* Configure MTL RX algorithms */
+       if (rx_queues_count > 1 && priv->hw->mac->prog_mtl_rx_algorithms)
+               priv->hw->mac->prog_mtl_rx_algorithms(priv->hw,
+                                               priv->plat->rx_sched_algorithm);
+
+       /* Configure MTL TX algorithms */
+       if (tx_queues_count > 1 && priv->hw->mac->prog_mtl_tx_algorithms)
+               priv->hw->mac->prog_mtl_tx_algorithms(priv->hw,
+                                               priv->plat->tx_sched_algorithm);
+
+       /* Configure CBS in AVB TX queues */
+       if (tx_queues_count > 1 && priv->hw->mac->config_cbs)
+               stmmac_configure_cbs(priv);
+
+       /* Map RX MTL to DMA channels */
+       if (priv->hw->mac->map_mtl_to_dma)
+               stmmac_rx_queue_dma_chan_map(priv);
+
+       /* Enable MAC RX Queues */
+       if (priv->hw->mac->rx_queue_enable)
+               stmmac_mac_enable_rx_queues(priv);
+
+       /* Set RX priorities */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_prio)
+               stmmac_mac_config_rx_queues_prio(priv);
+
+       /* Set TX priorities */
+       if (tx_queues_count > 1 && priv->hw->mac->tx_queue_prio)
+               stmmac_mac_config_tx_queues_prio(priv);
+
+       /* Set RX routing */
+       if (rx_queues_count > 1 && priv->hw->mac->rx_queue_routing)
+               stmmac_mac_config_rx_queues_routing(priv);
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -1679,6 +2441,9 @@ static void stmmac_init_tx_coalesce(stru
 static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 chan;
        int ret;
 
        /* DMA initialization and SW reset */
@@ -1708,9 +2473,9 @@ static int stmmac_hw_setup(struct net_de
        /* Initialize the MAC Core */
        priv->hw->mac->core_init(priv->hw, dev->mtu);
 
-       /* Initialize MAC RX Queues */
-       if (priv->hw->mac->rx_queue_enable)
-               stmmac_mac_enable_rx_queues(priv);
+       /* Initialize MTL*/
+       if (priv->synopsys_id >= DWMAC_CORE_4_00)
+               stmmac_mtl_configuration(priv);
 
        ret = priv->hw->mac->rx_ipc(priv->hw);
        if (!ret) {
@@ -1720,10 +2485,7 @@ static int stmmac_hw_setup(struct net_de
        }
 
        /* Enable the MAC Rx/Tx */
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               stmmac_dwmac4_set_mac(priv->ioaddr, true);
-       else
-               stmmac_set_mac(priv->ioaddr, true);
+       priv->hw->mac->set_mac(priv->ioaddr, true);
 
        /* Set the HW DMA mode and the COE */
        stmmac_dma_operation_mode(priv);
@@ -1731,6 +2493,10 @@ static int stmmac_hw_setup(struct net_de
        stmmac_mmc_setup(priv);
 
        if (init_ptp) {
+               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+               if (ret < 0)
+                       netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
+
                ret = stmmac_init_ptp(priv);
                if (ret == -EOPNOTSUPP)
                        netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -1745,35 +2511,37 @@ static int stmmac_hw_setup(struct net_de
                            __func__);
 #endif
        /* Start the ball rolling... */
-       netdev_dbg(priv->dev, "DMA RX/TX processes started...\n");
-       priv->hw->dma->start_tx(priv->ioaddr);
-       priv->hw->dma->start_rx(priv->ioaddr);
+       stmmac_start_all_dma(priv);
 
        priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
        if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) {
                priv->rx_riwt = MAX_DMA_RIWT;
-               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
+               priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
        }
 
        if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
                priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 
-       /*  set TX ring length */
-       if (priv->hw->dma->set_tx_ring_len)
-               priv->hw->dma->set_tx_ring_len(priv->ioaddr,
-                                              (DMA_TX_SIZE - 1));
-       /*  set RX ring length */
-       if (priv->hw->dma->set_rx_ring_len)
-               priv->hw->dma->set_rx_ring_len(priv->ioaddr,
-                                              (DMA_RX_SIZE - 1));
+       /* set TX and RX rings length */
+       stmmac_set_rings_length(priv);
+
        /* Enable TSO */
-       if (priv->tso)
-               priv->hw->dma->enable_tso(priv->ioaddr, 1, STMMAC_CHAN0);
+       if (priv->tso) {
+               for (chan = 0; chan < tx_cnt; chan++)
+                       priv->hw->dma->enable_tso(priv->ioaddr, 1, chan);
+       }
 
        return 0;
 }
 
+static void stmmac_hw_teardown(struct net_device *dev)
+{
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       clk_disable_unprepare(priv->plat->clk_ptp_ref);
+}
+
 /**
  *  stmmac_open - open entry point of the driver
  *  @dev : pointer to the device structure.
@@ -1842,7 +2610,7 @@ static int stmmac_open(struct net_device
                netdev_err(priv->dev,
                           "%s: ERROR: allocating the IRQ %d (error: %d)\n",
                           __func__, dev->irq, ret);
-               goto init_error;
+               goto irq_error;
        }
 
        /* Request the Wake IRQ in case of another line is used for WoL */
@@ -1869,8 +2637,8 @@ static int stmmac_open(struct net_device
                }
        }
 
-       napi_enable(&priv->napi);
-       netif_start_queue(dev);
+       stmmac_enable_all_queues(priv);
+       stmmac_start_all_queues(priv);
 
        return 0;
 
@@ -1879,7 +2647,12 @@ lpiirq_error:
                free_irq(priv->wol_irq, dev);
 wolirq_error:
        free_irq(dev->irq, dev);
+irq_error:
+       if (dev->phydev)
+               phy_stop(dev->phydev);
 
+       del_timer_sync(&priv->txtimer);
+       stmmac_hw_teardown(dev);
 init_error:
        free_dma_desc_resources(priv);
 dma_desc_error:
@@ -1908,9 +2681,9 @@ static int stmmac_release(struct net_dev
                phy_disconnect(dev->phydev);
        }
 
-       netif_stop_queue(dev);
+       stmmac_stop_all_queues(priv);
 
-       napi_disable(&priv->napi);
+       stmmac_disable_all_queues(priv);
 
        del_timer_sync(&priv->txtimer);
 
@@ -1922,14 +2695,13 @@ static int stmmac_release(struct net_dev
                free_irq(priv->lpi_irq, dev);
 
        /* Stop TX/RX DMA and clear the descriptors */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Release and free the Rx/Tx resources */
        free_dma_desc_resources(priv);
 
        /* Disable the MAC Rx/Tx */
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
 
        netif_carrier_off(dev);
 
@@ -1948,22 +2720,24 @@ static int stmmac_release(struct net_dev
  *  @des: buffer start address
  *  @total_len: total length to fill in descriptors
  *  @last_segmant: condition for the last descriptor
+ *  @queue: TX queue index
  *  Description:
  *  This function fills descriptor and request new descriptors according to
  *  buffer length to fill
  */
 static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
-                                int total_len, bool last_segment)
+                                int total_len, bool last_segment, u32 queue)
 {
+       struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
        struct dma_desc *desc;
-       int tmp_len;
        u32 buff_size;
+       int tmp_len;
 
        tmp_len = total_len;
 
        while (tmp_len > 0) {
-               priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
-               desc = priv->dma_tx + priv->cur_tx;
+               tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+               desc = tx_q->dma_tx + tx_q->cur_tx;
 
                desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
                buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
@@ -1971,7 +2745,7 @@ static void stmmac_tso_allocator(struct
 
                priv->hw->desc->prepare_tso_tx_desc(desc, 0, buff_size,
                        0, 1,
-                       (last_segment) && (buff_size < TSO_MAX_BUFF_SIZE),
+                       (last_segment) && (tmp_len <= TSO_MAX_BUFF_SIZE),
                        0, 0);
 
                tmp_len -= TSO_MAX_BUFF_SIZE;
@@ -2007,23 +2781,28 @@ static void stmmac_tso_allocator(struct
  */
 static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       u32 pay_len, mss;
-       int tmp_pay_len = 0;
+       struct dma_desc *desc, *first, *mss_desc = NULL;
        struct stmmac_priv *priv = netdev_priv(dev);
        int nfrags = skb_shinfo(skb)->nr_frags;
+       u32 queue = skb_get_queue_mapping(skb);
        unsigned int first_entry, des;
-       struct dma_desc *desc, *first, *mss_desc = NULL;
+       struct stmmac_tx_queue *tx_q;
+       int tmp_pay_len = 0;
+       u32 pay_len, mss;
        u8 proto_hdr_len;
        int i;
 
+       tx_q = &priv->tx_queue[queue];
+
        /* Compute header lengths */
        proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
        /* Desc availability based on threshold should be enough safe */
-       if (unlikely(stmmac_tx_avail(priv) <
+       if (unlikely(stmmac_tx_avail(priv, queue) <
                (((skb->len - proto_hdr_len) / TSO_MAX_BUFF_SIZE + 1)))) {
-               if (!netif_queue_stopped(dev)) {
-                       netif_stop_queue(dev);
+               if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
+                                                               queue));
                        /* This is a hard error, log it. */
                        netdev_err(priv->dev,
                                   "%s: Tx Ring full when queue awake\n",
@@ -2038,10 +2817,10 @@ static netdev_tx_t stmmac_tso_xmit(struc
 
        /* set new MSS value if needed */
        if (mss != priv->mss) {
-               mss_desc = priv->dma_tx + priv->cur_tx;
+               mss_desc = tx_q->dma_tx + tx_q->cur_tx;
                priv->hw->desc->set_mss(mss_desc, mss);
                priv->mss = mss;
-               priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
+               tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
        }
 
        if (netif_msg_tx_queued(priv)) {
@@ -2051,9 +2830,9 @@ static netdev_tx_t stmmac_tso_xmit(struc
                        skb->data_len);
        }
 
-       first_entry = priv->cur_tx;
+       first_entry = tx_q->cur_tx;
 
-       desc = priv->dma_tx + first_entry;
+       desc = tx_q->dma_tx + first_entry;
        first = desc;
 
        /* first descriptor: fill Headers on Buf1 */
@@ -2062,9 +2841,8 @@ static netdev_tx_t stmmac_tso_xmit(struc
        if (dma_mapping_error(priv->device, des))
                goto dma_map_err;
 
-       priv->tx_skbuff_dma[first_entry].buf = des;
-       priv->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
-       priv->tx_skbuff[first_entry] = skb;
+       tx_q->tx_skbuff_dma[first_entry].buf = des;
+       tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
 
        first->des0 = cpu_to_le32(des);
 
@@ -2075,7 +2853,7 @@ static netdev_tx_t stmmac_tso_xmit(struc
        /* If needed take extra descriptors to fill the remaining payload */
        tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
 
-       stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0));
+       stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
 
        /* Prepare fragments */
        for (i = 0; i < nfrags; i++) {
@@ -2084,24 +2862,34 @@ static netdev_tx_t stmmac_tso_xmit(struc
                des = skb_frag_dma_map(priv->device, frag, 0,
                                       skb_frag_size(frag),
                                       DMA_TO_DEVICE);
+               if (dma_mapping_error(priv->device, des))
+                       goto dma_map_err;
 
                stmmac_tso_allocator(priv, des, skb_frag_size(frag),
-                                    (i == nfrags - 1));
+                                    (i == nfrags - 1), queue);
 
-               priv->tx_skbuff_dma[priv->cur_tx].buf = des;
-               priv->tx_skbuff_dma[priv->cur_tx].len = skb_frag_size(frag);
-               priv->tx_skbuff[priv->cur_tx] = NULL;
-               priv->tx_skbuff_dma[priv->cur_tx].map_as_page = true;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
+               tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
+               tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
        }
 
-       priv->tx_skbuff_dma[priv->cur_tx].last_segment = true;
+       tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
+
+       /* Only the last descriptor gets to point to the skb. */
+       tx_q->tx_skbuff[tx_q->cur_tx] = skb;
 
-       priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
+       /* We've used all descriptors we need for this skb, however,
+        * advance cur_tx so that it references a fresh descriptor.
+        * ndo_start_xmit will fill this descriptor the next time it's
+        * called and stmmac_tx_clean may clean up to this descriptor.
+        */
+       tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 
-       if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
+       if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
                netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
                          __func__);
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        dev->stats.tx_bytes += skb->len;
@@ -2133,7 +2921,7 @@ static netdev_tx_t stmmac_tso_xmit(struc
        priv->hw->desc->prepare_tso_tx_desc(first, 1,
                        proto_hdr_len,
                        pay_len,
-                       1, priv->tx_skbuff_dma[first_entry].last_segment,
+                       1, tx_q->tx_skbuff_dma[first_entry].last_segment,
                        tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
        /* If context desc is used to change MSS */
@@ -2155,20 +2943,20 @@ static netdev_tx_t stmmac_tso_xmit(struc
 
        if (netif_msg_pktdata(priv)) {
                pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
-                       __func__, priv->cur_tx, priv->dirty_tx, first_entry,
-                       priv->cur_tx, first, nfrags);
+                       __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
+                       tx_q->cur_tx, first, nfrags);
 
-               priv->hw->desc->display_ring((void *)priv->dma_tx, DMA_TX_SIZE,
+               priv->hw->desc->display_ring((void *)tx_q->dma_tx, DMA_TX_SIZE,
                                             0);
 
                pr_info(">>> frame to be transmitted: ");
                print_pkt(skb->data, skb_headlen(skb));
        }
 
-       netdev_sent_queue(dev, skb->len);
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                      STMMAC_CHAN0);
+       priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
+                                      queue);
 
        return NETDEV_TX_OK;
 
@@ -2192,21 +2980,27 @@ static netdev_tx_t stmmac_xmit(struct sk
        struct stmmac_priv *priv = netdev_priv(dev);
        unsigned int nopaged_len = skb_headlen(skb);
        int i, csum_insertion = 0, is_jumbo = 0;
+       u32 queue = skb_get_queue_mapping(skb);
        int nfrags = skb_shinfo(skb)->nr_frags;
-       unsigned int entry, first_entry;
+       int entry;
+       unsigned int first_entry;
        struct dma_desc *desc, *first;
+       struct stmmac_tx_queue *tx_q;
        unsigned int enh_desc;
        unsigned int des;
 
+       tx_q = &priv->tx_queue[queue];
+
        /* Manage oversized TCP frames for GMAC4 device */
        if (skb_is_gso(skb) && priv->tso) {
                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                        return stmmac_tso_xmit(skb, dev);
        }
 
-       if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
-               if (!netif_queue_stopped(dev)) {
-                       netif_stop_queue(dev);
+       if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) {
+               if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, queue))) {
+                       netif_tx_stop_queue(netdev_get_tx_queue(priv->dev,
+                                                               queue));
                        /* This is a hard error, log it. */
                        netdev_err(priv->dev,
                                   "%s: Tx Ring full when queue awake\n",
@@ -2218,20 +3012,18 @@ static netdev_tx_t stmmac_xmit(struct sk
        if (priv->tx_path_in_lpi_mode)
                stmmac_disable_eee_mode(priv);
 
-       entry = priv->cur_tx;
+       entry = tx_q->cur_tx;
        first_entry = entry;
 
        csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
        if (likely(priv->extend_desc))
-               desc = (struct dma_desc *)(priv->dma_etx + entry);
+               desc = (struct dma_desc *)(tx_q->dma_etx + entry);
        else
-               desc = priv->dma_tx + entry;
+               desc = tx_q->dma_tx + entry;
 
        first = desc;
 
-       priv->tx_skbuff[first_entry] = skb;
-
        enh_desc = priv->plat->enh_desc;
        /* To program the descriptors according to the size of the frame */
        if (enh_desc)
@@ -2239,7 +3031,7 @@ static netdev_tx_t stmmac_xmit(struct sk
 
        if (unlikely(is_jumbo) && likely(priv->synopsys_id <
                                         DWMAC_CORE_4_00)) {
-               entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion);
+               entry = priv->hw->mode->jumbo_frm(tx_q, skb, csum_insertion);
                if (unlikely(entry < 0))
                        goto dma_map_err;
        }
@@ -2252,48 +3044,56 @@ static netdev_tx_t stmmac_xmit(struct sk
                entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
 
                if (likely(priv->extend_desc))
-                       desc = (struct dma_desc *)(priv->dma_etx + entry);
+                       desc = (struct dma_desc *)(tx_q->dma_etx + entry);
                else
-                       desc = priv->dma_tx + entry;
+                       desc = tx_q->dma_tx + entry;
 
                des = skb_frag_dma_map(priv->device, frag, 0, len,
                                       DMA_TO_DEVICE);
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err; /* should reuse desc w/o issues */
 
-               priv->tx_skbuff[entry] = NULL;
+               tx_q->tx_skbuff[entry] = NULL;
 
-               priv->tx_skbuff_dma[entry].buf = des;
+               tx_q->tx_skbuff_dma[entry].buf = des;
                if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
                        desc->des0 = cpu_to_le32(des);
                else
                        desc->des2 = cpu_to_le32(des);
 
-               priv->tx_skbuff_dma[entry].map_as_page = true;
-               priv->tx_skbuff_dma[entry].len = len;
-               priv->tx_skbuff_dma[entry].last_segment = last_segment;
+               tx_q->tx_skbuff_dma[entry].map_as_page = true;
+               tx_q->tx_skbuff_dma[entry].len = len;
+               tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
 
                /* Prepare the descriptor and set the own bit too */
                priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
-                                               priv->mode, 1, last_segment);
+                                               priv->mode, 1, last_segment,
+                                               skb->len);
        }
 
-       entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+       /* Only the last descriptor gets to point to the skb. */
+       tx_q->tx_skbuff[entry] = skb;
 
-       priv->cur_tx = entry;
+       /* We've used all descriptors we need for this skb, however,
+        * advance cur_tx so that it references a fresh descriptor.
+        * ndo_start_xmit will fill this descriptor the next time it's
+        * called and stmmac_tx_clean may clean up to this descriptor.
+        */
+       entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+       tx_q->cur_tx = entry;
 
        if (netif_msg_pktdata(priv)) {
                void *tx_head;
 
                netdev_dbg(priv->dev,
                           "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
-                          __func__, priv->cur_tx, priv->dirty_tx, first_entry,
+                          __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
                           entry, first, nfrags);
 
                if (priv->extend_desc)
-                       tx_head = (void *)priv->dma_etx;
+                       tx_head = (void *)tx_q->dma_etx;
                else
-                       tx_head = (void *)priv->dma_tx;
+                       tx_head = (void *)tx_q->dma_tx;
 
                priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
 
@@ -2301,10 +3101,10 @@ static netdev_tx_t stmmac_xmit(struct sk
                print_pkt(skb->data, skb->len);
        }
 
-       if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
+       if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
                netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
                          __func__);
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 
        dev->stats.tx_bytes += skb->len;
@@ -2339,14 +3139,14 @@ static netdev_tx_t stmmac_xmit(struct sk
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err;
 
-               priv->tx_skbuff_dma[first_entry].buf = des;
+               tx_q->tx_skbuff_dma[first_entry].buf = des;
                if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
                        first->des0 = cpu_to_le32(des);
                else
                        first->des2 = cpu_to_le32(des);
 
-               priv->tx_skbuff_dma[first_entry].len = nopaged_len;
-               priv->tx_skbuff_dma[first_entry].last_segment = last_segment;
+               tx_q->tx_skbuff_dma[first_entry].len = nopaged_len;
+               tx_q->tx_skbuff_dma[first_entry].last_segment = last_segment;
 
                if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
                             priv->hwts_tx_en)) {
@@ -2358,7 +3158,7 @@ static netdev_tx_t stmmac_xmit(struct sk
                /* Prepare the first descriptor setting the OWN bit too */
                priv->hw->desc->prepare_tx_desc(first, 1, nopaged_len,
                                                csum_insertion, priv->mode, 1,
-                                               last_segment);
+                                               last_segment, skb->len);
 
                /* The own bit must be the latest setting done when prepare the
                 * descriptor and then barrier is needed to make sure that
@@ -2367,13 +3167,13 @@ static netdev_tx_t stmmac_xmit(struct sk
                dma_wmb();
        }
 
-       netdev_sent_queue(dev, skb->len);
+       netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
        if (priv->synopsys_id < DWMAC_CORE_4_00)
                priv->hw->dma->enable_dma_transmission(priv->ioaddr);
        else
-               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
-                                              STMMAC_CHAN0);
+               priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, tx_q->tx_tail_addr,
+                                              queue);
 
        return NETDEV_TX_OK;
 
@@ -2401,9 +3201,9 @@ static void stmmac_rx_vlan(struct net_de
 }
 
 
-static inline int stmmac_rx_threshold_count(struct stmmac_priv *priv)
+static inline int stmmac_rx_threshold_count(struct stmmac_rx_queue *rx_q)
 {
-       if (priv->rx_zeroc_thresh < STMMAC_RX_THRESH)
+       if (rx_q->rx_zeroc_thresh < STMMAC_RX_THRESH)
                return 0;
 
        return 1;
@@ -2412,30 +3212,33 @@ static inline int stmmac_rx_threshold_co
 /**
  * stmmac_rx_refill - refill used skb preallocated buffers
  * @priv: driver private structure
+ * @queue: RX queue index
  * Description : this is to reallocate the skb for the reception process
  * that is based on zero-copy.
  */
-static inline void stmmac_rx_refill(struct stmmac_priv *priv)
+static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       int dirty = stmmac_rx_dirty(priv, queue);
+       unsigned int entry = rx_q->dirty_rx;
+
        int bfsize = priv->dma_buf_sz;
-       unsigned int entry = priv->dirty_rx;
-       int dirty = stmmac_rx_dirty(priv);
 
        while (dirty-- > 0) {
                struct dma_desc *p;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_erx + entry);
+                       p = (struct dma_desc *)(rx_q->dma_erx + entry);
                else
-                       p = priv->dma_rx + entry;
+                       p = rx_q->dma_rx + entry;
 
-               if (likely(priv->rx_skbuff[entry] == NULL)) {
+               if (likely(!rx_q->rx_skbuff[entry])) {
                        struct sk_buff *skb;
 
                        skb = netdev_alloc_skb_ip_align(priv->dev, bfsize);
                        if (unlikely(!skb)) {
                                /* so for a while no zero-copy! */
-                               priv->rx_zeroc_thresh = STMMAC_RX_THRESH;
+                               rx_q->rx_zeroc_thresh = STMMAC_RX_THRESH;
                                if (unlikely(net_ratelimit()))
                                        dev_err(priv->device,
                                                "fail to alloc skb entry %d\n",
@@ -2443,28 +3246,28 @@ static inline void stmmac_rx_refill(stru
                                break;
                        }
 
-                       priv->rx_skbuff[entry] = skb;
-                       priv->rx_skbuff_dma[entry] =
+                       rx_q->rx_skbuff[entry] = skb;
+                       rx_q->rx_skbuff_dma[entry] =
                            dma_map_single(priv->device, skb->data, bfsize,
                                           DMA_FROM_DEVICE);
                        if (dma_mapping_error(priv->device,
-                                             priv->rx_skbuff_dma[entry])) {
+                                             rx_q->rx_skbuff_dma[entry])) {
                                netdev_err(priv->dev, "Rx DMA map failed\n");
                                dev_kfree_skb(skb);
                                break;
                        }
 
                        if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
+                               p->des0 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
                                p->des1 = 0;
                        } else {
-                               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
+                               p->des2 = cpu_to_le32(rx_q->rx_skbuff_dma[entry]);
                        }
                        if (priv->hw->mode->refill_desc3)
-                               priv->hw->mode->refill_desc3(priv, p);
+                               priv->hw->mode->refill_desc3(rx_q, p);
 
-                       if (priv->rx_zeroc_thresh > 0)
-                               priv->rx_zeroc_thresh--;
+                       if (rx_q->rx_zeroc_thresh > 0)
+                               rx_q->rx_zeroc_thresh--;
 
                        netif_dbg(priv, rx_status, priv->dev,
                                  "refill entry #%d\n", entry);
@@ -2480,31 +3283,33 @@ static inline void stmmac_rx_refill(stru
 
                entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
        }
-       priv->dirty_rx = entry;
+       rx_q->dirty_rx = entry;
 }
 
 /**
  * stmmac_rx - manage the receive process
  * @priv: driver private structure
- * @limit: napi bugget.
+ * @limit: napi bugget
+ * @queue: RX queue index.
  * Description :  this the function called by the napi poll method.
  * It gets all the frames inside the ring.
  */
-static int stmmac_rx(struct stmmac_priv *priv, int limit)
+static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
-       unsigned int entry = priv->cur_rx;
+       struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+       unsigned int entry = rx_q->cur_rx;
+       int coe = priv->hw->rx_csum;
        unsigned int next_entry;
        unsigned int count = 0;
-       int coe = priv->hw->rx_csum;
 
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
 
                netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
                if (priv->extend_desc)
-                       rx_head = (void *)priv->dma_erx;
+                       rx_head = (void *)rx_q->dma_erx;
                else
-                       rx_head = (void *)priv->dma_rx;
+                       rx_head = (void *)rx_q->dma_rx;
 
                priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true);
        }
@@ -2514,9 +3319,9 @@ static int stmmac_rx(struct stmmac_priv
                struct dma_desc *np;
 
                if (priv->extend_desc)
-                       p = (struct dma_desc *)(priv->dma_erx + entry);
+                       p = (struct dma_desc *)(rx_q->dma_erx + entry);
                else
-                       p = priv->dma_rx + entry;
+                       p = rx_q->dma_rx + entry;
 
                /* read the status of the incoming frame */
                status = priv->hw->desc->rx_status(&priv->dev->stats,
@@ -2527,20 +3332,20 @@ static int stmmac_rx(struct stmmac_priv
 
                count++;
 
-               priv->cur_rx = STMMAC_GET_ENTRY(priv->cur_rx, DMA_RX_SIZE);
-               next_entry = priv->cur_rx;
+               rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
+               next_entry = rx_q->cur_rx;
 
                if (priv->extend_desc)
-                       np = (struct dma_desc *)(priv->dma_erx + next_entry);
+                       np = (struct dma_desc *)(rx_q->dma_erx + next_entry);
                else
-                       np = priv->dma_rx + next_entry;
+                       np = rx_q->dma_rx + next_entry;
 
                prefetch(np);
 
                if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
                        priv->hw->desc->rx_extended_status(&priv->dev->stats,
                                                           &priv->xstats,
-                                                          priv->dma_erx +
+                                                          rx_q->dma_erx +
                                                           entry);
                if (unlikely(status == discard_frame)) {
                        priv->dev->stats.rx_errors++;
@@ -2550,9 +3355,9 @@ static int stmmac_rx(struct stmmac_priv
                                 * them in stmmac_rx_refill() function so that
                                 * device can reuse it.
                                 */
-                               priv->rx_skbuff[entry] = NULL;
+                               rx_q->rx_skbuff[entry] = NULL;
                                dma_unmap_single(priv->device,
-                                                priv->rx_skbuff_dma[entry],
+                                                rx_q->rx_skbuff_dma[entry],
                                                 priv->dma_buf_sz,
                                                 DMA_FROM_DEVICE);
                        }
@@ -2600,7 +3405,7 @@ static int stmmac_rx(struct stmmac_priv
                         */
                        if (unlikely(!priv->plat->has_gmac4 &&
                                     ((frame_len < priv->rx_copybreak) ||
-                                    stmmac_rx_threshold_count(priv)))) {
+                                    stmmac_rx_threshold_count(rx_q)))) {
                                skb = netdev_alloc_skb_ip_align(priv->dev,
                                                                frame_len);
                                if (unlikely(!skb)) {
@@ -2612,21 +3417,21 @@ static int stmmac_rx(struct stmmac_priv
                                }
 
                                dma_sync_single_for_cpu(priv->device,
-                                                       priv->rx_skbuff_dma
+                                                       rx_q->rx_skbuff_dma
                                                        [entry], frame_len,
                                                        DMA_FROM_DEVICE);
                                skb_copy_to_linear_data(skb,
-                                                       priv->
+                                                       rx_q->
                                                        rx_skbuff[entry]->data,
                                                        frame_len);
 
                                skb_put(skb, frame_len);
                                dma_sync_single_for_device(priv->device,
-                                                          priv->rx_skbuff_dma
+                                                          rx_q->rx_skbuff_dma
                                                           [entry], frame_len,
                                                           DMA_FROM_DEVICE);
                        } else {
-                               skb = priv->rx_skbuff[entry];
+                               skb = rx_q->rx_skbuff[entry];
                                if (unlikely(!skb)) {
                                        netdev_err(priv->dev,
                                                   "%s: Inconsistent Rx chain\n",
@@ -2635,12 +3440,12 @@ static int stmmac_rx(struct stmmac_priv
                                        break;
                                }
                                prefetch(skb->data - NET_IP_ALIGN);
-                               priv->rx_skbuff[entry] = NULL;
-                               priv->rx_zeroc_thresh++;
+                               rx_q->rx_skbuff[entry] = NULL;
+                               rx_q->rx_zeroc_thresh++;
 
                                skb_put(skb, frame_len);
                                dma_unmap_single(priv->device,
-                                                priv->rx_skbuff_dma[entry],
+                                                rx_q->rx_skbuff_dma[entry],
                                                 priv->dma_buf_sz,
                                                 DMA_FROM_DEVICE);
                        }
@@ -2662,7 +3467,7 @@ static int stmmac_rx(struct stmmac_priv
                        else
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-                       napi_gro_receive(&priv->napi, skb);
+                       napi_gro_receive(&rx_q->napi, skb);
 
                        priv->dev->stats.rx_packets++;
                        priv->dev->stats.rx_bytes += frame_len;
@@ -2670,7 +3475,7 @@ static int stmmac_rx(struct stmmac_priv
                entry = next_entry;
        }
 
-       stmmac_rx_refill(priv);
+       stmmac_rx_refill(priv, queue);
 
        priv->xstats.rx_pkt_n += count;
 
@@ -2687,16 +3492,24 @@ static int stmmac_rx(struct stmmac_priv
  */
 static int stmmac_poll(struct napi_struct *napi, int budget)
 {
-       struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
+       struct stmmac_rx_queue *rx_q =
+               container_of(napi, struct stmmac_rx_queue, napi);
+       struct stmmac_priv *priv = rx_q->priv_data;
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 chan = rx_q->queue_index;
        int work_done = 0;
+       u32 queue;
 
        priv->xstats.napi_poll++;
-       stmmac_tx_clean(priv);
 
-       work_done = stmmac_rx(priv, budget);
+       /* check all the queues */
+       for (queue = 0; queue < tx_count; queue++)
+               stmmac_tx_clean(priv, queue);
+
+       work_done = stmmac_rx(priv, budget, rx_q->queue_index);
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
-               stmmac_enable_dma_irq(priv);
+               stmmac_enable_dma_irq(priv, chan);
        }
        return work_done;
 }
@@ -2712,9 +3525,12 @@ static int stmmac_poll(struct napi_struc
 static void stmmac_tx_timeout(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 chan;
 
        /* Clear Tx resources and restart transmitting again */
-       stmmac_tx_err(priv);
+       for (chan = 0; chan < tx_count; chan++)
+               stmmac_tx_err(priv, chan);
 }
 
 /**
@@ -2837,6 +3653,12 @@ static irqreturn_t stmmac_interrupt(int
 {
        struct net_device *dev = (struct net_device *)dev_id;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queues_count;
+       u32 queue;
+
+       queues_count = (rx_cnt > tx_cnt) ? rx_cnt : tx_cnt;
 
        if (priv->irq_wake)
                pm_wakeup_event(priv->device, 0);
@@ -2850,16 +3672,30 @@ static irqreturn_t stmmac_interrupt(int
        if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
                int status = priv->hw->mac->host_irq_status(priv->hw,
                                                            &priv->xstats);
+
                if (unlikely(status)) {
                        /* For LPI we need to save the tx status */
                        if (status & CORE_IRQ_TX_PATH_IN_LPI_MODE)
                                priv->tx_path_in_lpi_mode = true;
                        if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
                                priv->tx_path_in_lpi_mode = false;
-                       if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr)
-                               priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
-                                                       priv->rx_tail_addr,
-                                                       STMMAC_CHAN0);
+               }
+
+               if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+                       for (queue = 0; queue < queues_count; queue++) {
+                               struct stmmac_rx_queue *rx_q =
+                               &priv->rx_queue[queue];
+
+                               status |=
+                               priv->hw->mac->host_mtl_irq_status(priv->hw,
+                                                                  queue);
+
+                               if (status & CORE_IRQ_MTL_RX_OVERFLOW &&
+                                   priv->hw->dma->set_rx_tail_ptr)
+                                       priv->hw->dma->set_rx_tail_ptr(priv->ioaddr,
+                                                               rx_q->rx_tail_addr,
+                                                               queue);
+                       }
                }
 
                /* PCS link status */
@@ -2944,7 +3780,7 @@ static void sysfs_display_ring(void *hea
                        ep++;
                } else {
                        seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                                  i, (unsigned int)virt_to_phys(ep),
+                                  i, (unsigned int)virt_to_phys(p),
                                   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
                                   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
                        p++;
@@ -2957,17 +3793,40 @@ static int stmmac_sysfs_ring_read(struct
 {
        struct net_device *dev = seq->private;
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 tx_count = priv->plat->tx_queues_to_use;
+       u32 queue;
 
-       if (priv->extend_desc) {
-               seq_printf(seq, "Extended RX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_erx, DMA_RX_SIZE, 1, seq);
-               seq_printf(seq, "Extended TX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_etx, DMA_TX_SIZE, 1, seq);
-       } else {
-               seq_printf(seq, "RX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_rx, DMA_RX_SIZE, 0, seq);
-               seq_printf(seq, "TX descriptor ring:\n");
-               sysfs_display_ring((void *)priv->dma_tx, DMA_TX_SIZE, 0, seq);
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               seq_printf(seq, "RX Queue %d:\n", queue);
+
+               if (priv->extend_desc) {
+                       seq_printf(seq, "Extended descriptor ring:\n");
+                       sysfs_display_ring((void *)rx_q->dma_erx,
+                                          DMA_RX_SIZE, 1, seq);
+               } else {
+                       seq_printf(seq, "Descriptor ring:\n");
+                       sysfs_display_ring((void *)rx_q->dma_rx,
+                                          DMA_RX_SIZE, 0, seq);
+               }
+       }
+
+       for (queue = 0; queue < tx_count; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               seq_printf(seq, "TX Queue %d:\n", queue);
+
+               if (priv->extend_desc) {
+                       seq_printf(seq, "Extended descriptor ring:\n");
+                       sysfs_display_ring((void *)tx_q->dma_etx,
+                                          DMA_TX_SIZE, 1, seq);
+               } else {
+                       seq_printf(seq, "Descriptor ring:\n");
+                       sysfs_display_ring((void *)tx_q->dma_tx,
+                                          DMA_TX_SIZE, 0, seq);
+               }
        }
 
        return 0;
@@ -3250,11 +4109,14 @@ int stmmac_dvr_probe(struct device *devi
                     struct plat_stmmacenet_data *plat_dat,
                     struct stmmac_resources *res)
 {
-       int ret = 0;
        struct net_device *ndev = NULL;
        struct stmmac_priv *priv;
+       int ret = 0;
+       u32 queue;
 
-       ndev = alloc_etherdev(sizeof(struct stmmac_priv));
+       ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
+                                 MTL_MAX_TX_QUEUES,
+                                 MTL_MAX_RX_QUEUES);
        if (!ndev)
                return -ENOMEM;
 
@@ -3296,6 +4158,10 @@ int stmmac_dvr_probe(struct device *devi
        if (ret)
                goto error_hw_init;
 
+       /* Configure real RX and TX queues */
+       netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
+       netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
+
        ndev->netdev_ops = &stmmac_netdev_ops;
 
        ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -3328,7 +4194,12 @@ int stmmac_dvr_probe(struct device *devi
                         "Enable RX Mitigation via HW Watchdog Timer\n");
        }
 
-       netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+       for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
+                              (8 * priv->plat->rx_queues_to_use));
+       }
 
        spin_lock_init(&priv->lock);
 
@@ -3373,7 +4244,11 @@ error_netdev_register:
            priv->hw->pcs != STMMAC_PCS_RTBI)
                stmmac_mdio_unregister(ndev);
 error_mdio_register:
-       netif_napi_del(&priv->napi);
+       for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               netif_napi_del(&rx_q->napi);
+       }
 error_hw_init:
        free_netdev(ndev);
 
@@ -3394,10 +4269,9 @@ int stmmac_dvr_remove(struct device *dev
 
        netdev_info(priv->dev, "%s: removing driver", __func__);
 
-       priv->hw->dma->stop_rx(priv->ioaddr);
-       priv->hw->dma->stop_tx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
-       stmmac_set_mac(priv->ioaddr, false);
+       priv->hw->mac->set_mac(priv->ioaddr, false);
        netif_carrier_off(ndev);
        unregister_netdev(ndev);
        if (priv->plat->stmmac_rst)
@@ -3436,20 +4310,19 @@ int stmmac_suspend(struct device *dev)
        spin_lock_irqsave(&priv->lock, flags);
 
        netif_device_detach(ndev);
-       netif_stop_queue(ndev);
+       stmmac_stop_all_queues(priv);
 
-       napi_disable(&priv->napi);
+       stmmac_disable_all_queues(priv);
 
        /* Stop TX/RX DMA */
-       priv->hw->dma->stop_tx(priv->ioaddr);
-       priv->hw->dma->stop_rx(priv->ioaddr);
+       stmmac_stop_all_dma(priv);
 
        /* Enable Power down mode by programming the PMT regs */
        if (device_may_wakeup(priv->device)) {
                priv->hw->mac->pmt(priv->hw, priv->wolopts);
                priv->irq_wake = 1;
        } else {
-               stmmac_set_mac(priv->ioaddr, false);
+               priv->hw->mac->set_mac(priv->ioaddr, false);
                pinctrl_pm_select_sleep_state(priv->device);
                /* Disable clock in case of PWM is off */
                clk_disable(priv->plat->pclk);
@@ -3465,6 +4338,31 @@ int stmmac_suspend(struct device *dev)
 EXPORT_SYMBOL_GPL(stmmac_suspend);
 
 /**
+ * stmmac_reset_queues_param - reset queue parameters
+ * @dev: device pointer
+ */
+static void stmmac_reset_queues_param(struct stmmac_priv *priv)
+{
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < rx_cnt; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               rx_q->cur_rx = 0;
+               rx_q->dirty_rx = 0;
+       }
+
+       for (queue = 0; queue < tx_cnt; queue++) {
+               struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+               tx_q->cur_tx = 0;
+               tx_q->dirty_tx = 0;
+       }
+}
+
+/**
  * stmmac_resume - resume callback
  * @dev: device pointer
  * Description: when resume this function is invoked to setup the DMA and CORE
@@ -3504,10 +4402,8 @@ int stmmac_resume(struct device *dev)
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       priv->cur_rx = 0;
-       priv->dirty_rx = 0;
-       priv->dirty_tx = 0;
-       priv->cur_tx = 0;
+       stmmac_reset_queues_param(priv);
+
        /* reset private mss value to force mss context settings at
         * next tso xmit (only used for gmac4).
         */
@@ -3519,9 +4415,9 @@ int stmmac_resume(struct device *dev)
        stmmac_init_tx_coalesce(priv);
        stmmac_set_rx_mode(ndev);
 
-       napi_enable(&priv->napi);
+       stmmac_enable_all_queues(priv);
 
-       netif_start_queue(ndev);
+       stmmac_start_all_queues(priv);
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -32,6 +32,7 @@
  */
 struct stmmac_pci_dmi_data {
        const char *name;
+       const char *asset_tag;
        unsigned int func;
        int phy_addr;
 };
@@ -46,6 +47,7 @@ struct stmmac_pci_info {
 static int stmmac_pci_find_phy_addr(struct stmmac_pci_info *info)
 {
        const char *name = dmi_get_system_info(DMI_BOARD_NAME);
+       const char *asset_tag = dmi_get_system_info(DMI_BOARD_ASSET_TAG);
        unsigned int func = PCI_FUNC(info->pdev->devfn);
        struct stmmac_pci_dmi_data *dmi;
 
@@ -57,18 +59,19 @@ static int stmmac_pci_find_phy_addr(stru
                return 1;
 
        for (dmi = info->dmi; dmi->name && *dmi->name; dmi++) {
-               if (!strcmp(dmi->name, name) && dmi->func == func)
+               if (!strcmp(dmi->name, name) && dmi->func == func) {
+                       /* If asset tag is provided, match on it as well. */
+                       if (dmi->asset_tag && strcmp(dmi->asset_tag, asset_tag))
+                               continue;
                        return dmi->phy_addr;
+               }
        }
 
        return -ENODEV;
 }
 
-static void stmmac_default_data(struct plat_stmmacenet_data *plat)
+static void common_default_data(struct plat_stmmacenet_data *plat)
 {
-       plat->bus_id = 1;
-       plat->phy_addr = 0;
-       plat->interface = PHY_INTERFACE_MODE_GMII;
        plat->clk_csr = 2;      /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
        plat->has_gmac = 1;
        plat->force_sf_dma_mode = 1;
@@ -76,10 +79,6 @@ static void stmmac_default_data(struct p
        plat->mdio_bus_data->phy_reset = NULL;
        plat->mdio_bus_data->phy_mask = 0;
 
-       plat->dma_cfg->pbl = 32;
-       plat->dma_cfg->pblx8 = true;
-       /* TODO: AXI */
-
        /* Set default value for multicast hash bins */
        plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -88,6 +87,31 @@ static void stmmac_default_data(struct p
 
        /* Set the maxmtu to a default of JUMBO_LEN */
        plat->maxmtu = JUMBO_LEN;
+
+       /* Set default number of RX and TX queues to use */
+       plat->tx_queues_to_use = 1;
+       plat->rx_queues_to_use = 1;
+
+       /* Disable Priority config by default */
+       plat->tx_queues_cfg[0].use_prio = false;
+       plat->rx_queues_cfg[0].use_prio = false;
+
+       /* Disable RX queues routing by default */
+       plat->rx_queues_cfg[0].pkt_route = 0x0;
+}
+
+static void stmmac_default_data(struct plat_stmmacenet_data *plat)
+{
+       /* Set common default data first */
+       common_default_data(plat);
+
+       plat->bus_id = 1;
+       plat->phy_addr = 0;
+       plat->interface = PHY_INTERFACE_MODE_GMII;
+
+       plat->dma_cfg->pbl = 32;
+       plat->dma_cfg->pblx8 = true;
+       /* TODO: AXI */
 }
 
 static int quark_default_data(struct plat_stmmacenet_data *plat,
@@ -96,6 +120,9 @@ static int quark_default_data(struct pla
        struct pci_dev *pdev = info->pdev;
        int ret;
 
+       /* Set common default data first */
+       common_default_data(plat);
+
        /*
         * Refuse to load the driver and register net device if MAC controller
         * does not connect to any PHY interface.
@@ -107,27 +134,12 @@ static int quark_default_data(struct pla
        plat->bus_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
        plat->phy_addr = ret;
        plat->interface = PHY_INTERFACE_MODE_RMII;
-       plat->clk_csr = 2;
-       plat->has_gmac = 1;
-       plat->force_sf_dma_mode = 1;
-
-       plat->mdio_bus_data->phy_reset = NULL;
-       plat->mdio_bus_data->phy_mask = 0;
 
        plat->dma_cfg->pbl = 16;
        plat->dma_cfg->pblx8 = true;
        plat->dma_cfg->fixed_burst = 1;
        /* AXI (TODO) */
 
-       /* Set default value for multicast hash bins */
-       plat->multicast_filter_bins = HASH_TABLE_SIZE;
-
-       /* Set default value for unicast filter entries */
-       plat->unicast_filter_entries = 1;
-
-       /* Set the maxmtu to a default of JUMBO_LEN */
-       plat->maxmtu = JUMBO_LEN;
-
        return 0;
 }
 
@@ -142,6 +154,24 @@ static struct stmmac_pci_dmi_data quark_
                .func = 6,
                .phy_addr = 1,
        },
+       {
+               .name = "SIMATIC IOT2000",
+               .asset_tag = "6ES7647-0AA00-0YA2",
+               .func = 6,
+               .phy_addr = 1,
+       },
+       {
+               .name = "SIMATIC IOT2000",
+               .asset_tag = "6ES7647-0AA00-1YA2",
+               .func = 6,
+               .phy_addr = 1,
+       },
+       {
+               .name = "SIMATIC IOT2000",
+               .asset_tag = "6ES7647-0AA00-1YA2",
+               .func = 7,
+               .phy_addr = 1,
+       },
        {}
 };
 
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -108,7 +108,7 @@ static struct stmmac_axi *stmmac_axi_set
        if (!np)
                return NULL;
 
-       axi = kzalloc(sizeof(*axi), GFP_KERNEL);
+       axi = devm_kzalloc(&pdev->dev, sizeof(*axi), GFP_KERNEL);
        if (!axi) {
                of_node_put(np);
                return ERR_PTR(-ENOMEM);
@@ -132,6 +132,155 @@ static struct stmmac_axi *stmmac_axi_set
 }
 
 /**
+ * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
+ * @pdev: platform device
+ */
+static void stmmac_mtl_setup(struct platform_device *pdev,
+                            struct plat_stmmacenet_data *plat)
+{
+       struct device_node *q_node;
+       struct device_node *rx_node;
+       struct device_node *tx_node;
+       u8 queue = 0;
+
+       /* For backwards-compatibility with device trees that don't have any
+        * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
+        * to one RX and TX queues each.
+        */
+       plat->rx_queues_to_use = 1;
+       plat->tx_queues_to_use = 1;
+
+       rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
+       if (!rx_node)
+               return;
+
+       tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
+       if (!tx_node) {
+               of_node_put(rx_node);
+               return;
+       }
+
+       /* Processing RX queues common config */
+       if (of_property_read_u8(rx_node, "snps,rx-queues-to-use",
+                               &plat->rx_queues_to_use))
+               plat->rx_queues_to_use = 1;
+
+       if (of_property_read_bool(rx_node, "snps,rx-sched-sp"))
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+       else if (of_property_read_bool(rx_node, "snps,rx-sched-wsp"))
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_WSP;
+       else
+               plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
+
+       /* Processing individual RX queue config */
+       for_each_child_of_node(rx_node, q_node) {
+               if (queue >= plat->rx_queues_to_use)
+                       break;
+
+               if (of_property_read_bool(q_node, "snps,dcb-algorithm"))
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               else if (of_property_read_bool(q_node, "snps,avb-algorithm"))
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
+               else
+                       plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+
+               if (of_property_read_u8(q_node, "snps,map-to-dma-channel",
+                                       &plat->rx_queues_cfg[queue].chan))
+                       plat->rx_queues_cfg[queue].chan = queue;
+               /* TODO: Dynamic mapping to be included in the future */
+
+               if (of_property_read_u32(q_node, "snps,priority",
+                                       &plat->rx_queues_cfg[queue].prio)) {
+                       plat->rx_queues_cfg[queue].prio = 0;
+                       plat->rx_queues_cfg[queue].use_prio = false;
+               } else {
+                       plat->rx_queues_cfg[queue].use_prio = true;
+               }
+
+               /* RX queue specific packet type routing */
+               if (of_property_read_bool(q_node, "snps,route-avcp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_AVCPQ;
+               else if (of_property_read_bool(q_node, "snps,route-ptp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_PTPQ;
+               else if (of_property_read_bool(q_node, "snps,route-dcbcp"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_DCBCPQ;
+               else if (of_property_read_bool(q_node, "snps,route-up"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_UPQ;
+               else if (of_property_read_bool(q_node, "snps,route-multi-broad"))
+                       plat->rx_queues_cfg[queue].pkt_route = PACKET_MCBCQ;
+               else
+                       plat->rx_queues_cfg[queue].pkt_route = 0x0;
+
+               queue++;
+       }
+
+       /* Processing TX queues common config */
+       if (of_property_read_u8(tx_node, "snps,tx-queues-to-use",
+                               &plat->tx_queues_to_use))
+               plat->tx_queues_to_use = 1;
+
+       if (of_property_read_bool(tx_node, "snps,tx-sched-wrr"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WRR;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-wfq"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR;
+       else if (of_property_read_bool(tx_node, "snps,tx-sched-sp"))
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
+       else
+               plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
+
+       queue = 0;
+
+       /* Processing individual TX queue config */
+       for_each_child_of_node(tx_node, q_node) {
+               if (queue >= plat->tx_queues_to_use)
+                       break;
+
+               if (of_property_read_u8(q_node, "snps,weight",
+                                       &plat->tx_queues_cfg[queue].weight))
+                       plat->tx_queues_cfg[queue].weight = 0x10 + queue;
+
+               if (of_property_read_bool(q_node, "snps,dcb-algorithm")) {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               } else if (of_property_read_bool(q_node,
+                                                "snps,avb-algorithm")) {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB;
+
+                       /* Credit Base Shaper parameters used by AVB */
+                       if (of_property_read_u32(q_node, "snps,send_slope",
+                               &plat->tx_queues_cfg[queue].send_slope))
+                               plat->tx_queues_cfg[queue].send_slope = 0x0;
+                       if (of_property_read_u32(q_node, "snps,idle_slope",
+                               &plat->tx_queues_cfg[queue].idle_slope))
+                               plat->tx_queues_cfg[queue].idle_slope = 0x0;
+                       if (of_property_read_u32(q_node, "snps,high_credit",
+                               &plat->tx_queues_cfg[queue].high_credit))
+                               plat->tx_queues_cfg[queue].high_credit = 0x0;
+                       if (of_property_read_u32(q_node, "snps,low_credit",
+                               &plat->tx_queues_cfg[queue].low_credit))
+                               plat->tx_queues_cfg[queue].low_credit = 0x0;
+               } else {
+                       plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
+               }
+
+               if (of_property_read_u32(q_node, "snps,priority",
+                                       &plat->tx_queues_cfg[queue].prio)) {
+                       plat->tx_queues_cfg[queue].prio = 0;
+                       plat->tx_queues_cfg[queue].use_prio = false;
+               } else {
+                       plat->tx_queues_cfg[queue].use_prio = true;
+               }
+
+               queue++;
+       }
+
+       of_node_put(rx_node);
+       of_node_put(tx_node);
+       of_node_put(q_node);
+}
+
+/**
  * stmmac_dt_phy - parse device-tree driver parameters to allocate PHY resources
  * @plat: driver data platform structure
  * @np: device tree node
@@ -340,6 +489,8 @@ stmmac_probe_config_dt(struct platform_d
 
        plat->axi = stmmac_axi_setup(pdev);
 
+       stmmac_mtl_setup(pdev, plat);
+
        /* clock setup */
        plat->stmmac_clk = devm_clk_get(&pdev->dev,
                                        STMMAC_RESOURCE_NAME);
@@ -359,13 +510,12 @@ stmmac_probe_config_dt(struct platform_d
        clk_prepare_enable(plat->pclk);
 
        /* Fall-back to main clock in case of no PTP ref is passed */
-       plat->clk_ptp_ref = devm_clk_get(&pdev->dev, "clk_ptp_ref");
+       plat->clk_ptp_ref = devm_clk_get(&pdev->dev, "ptp_ref");
        if (IS_ERR(plat->clk_ptp_ref)) {
                plat->clk_ptp_rate = clk_get_rate(plat->stmmac_clk);
                plat->clk_ptp_ref = NULL;
                dev_warn(&pdev->dev, "PTP uses main clock\n");
        } else {
-               clk_prepare_enable(plat->clk_ptp_ref);
                plat->clk_ptp_rate = clk_get_rate(plat->clk_ptp_ref);
                dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate);
        }
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -28,6 +28,9 @@
 
 #include <linux/platform_device.h>
 
+#define MTL_MAX_RX_QUEUES      8
+#define MTL_MAX_TX_QUEUES      8
+
 #define STMMAC_RX_COE_NONE     0
 #define STMMAC_RX_COE_TYPE1    1
 #define STMMAC_RX_COE_TYPE2    2
@@ -44,6 +47,18 @@
 #define        STMMAC_CSR_150_250M     0x4     /* MDC = clk_scr_i/102 */
 #define        STMMAC_CSR_250_300M     0x5     /* MDC = clk_scr_i/122 */
 
+/* MTL algorithms identifiers */
+#define MTL_TX_ALGORITHM_WRR   0x0
+#define MTL_TX_ALGORITHM_WFQ   0x1
+#define MTL_TX_ALGORITHM_DWRR  0x2
+#define MTL_TX_ALGORITHM_SP    0x3
+#define MTL_RX_ALGORITHM_SP    0x4
+#define MTL_RX_ALGORITHM_WSP   0x5
+
+/* RX/TX Queue Mode */
+#define MTL_QUEUE_AVB          0x0
+#define MTL_QUEUE_DCB          0x1
+
 /* The MDC clock could be set higher than the IEEE 802.3
  * specified frequency limit 0f 2.5 MHz, by programming a clock divider
  * of value different than the above defined values. The resultant MDIO
@@ -109,6 +124,26 @@ struct stmmac_axi {
        bool axi_rb;
 };
 
+struct stmmac_rxq_cfg {
+       u8 mode_to_use;
+       u8 chan;
+       u8 pkt_route;
+       bool use_prio;
+       u32 prio;
+};
+
+struct stmmac_txq_cfg {
+       u8 weight;
+       u8 mode_to_use;
+       /* Credit Base Shaper parameters */
+       u32 send_slope;
+       u32 idle_slope;
+       u32 high_credit;
+       u32 low_credit;
+       bool use_prio;
+       u32 prio;
+};
+
 struct plat_stmmacenet_data {
        int bus_id;
        int phy_addr;
@@ -133,6 +168,12 @@ struct plat_stmmacenet_data {
        int unicast_filter_entries;
        int tx_fifo_size;
        int rx_fifo_size;
+       u8 rx_queues_to_use;
+       u8 tx_queues_to_use;
+       u8 rx_sched_algorithm;
+       u8 tx_sched_algorithm;
+       struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
+       struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
        void (*fix_mac_speed)(void *priv, unsigned int speed);
        int (*init)(struct platform_device *pdev, void *priv);
        void (*exit)(struct platform_device *pdev, void *priv);