OpenWrt – Rev 1

Subversion Repositories:
Rev:
From: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Date: Fri, 7 Jun 2013 18:35:22 -0500
Subject: MIPS: r4k_cache: use more efficient cache blast

Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.

Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -665,16 +665,48 @@ static inline void prot##extra##blast_##
                                                    unsigned long end)  \
 {                                                                      \
        unsigned long lsize = cpu_##desc##_line_size();                 \
+       unsigned long lsize_2 = lsize * 2;                              \
+       unsigned long lsize_3 = lsize * 3;                              \
+       unsigned long lsize_4 = lsize * 4;                              \
+       unsigned long lsize_5 = lsize * 5;                              \
+       unsigned long lsize_6 = lsize * 6;                              \
+       unsigned long lsize_7 = lsize * 7;                              \
+       unsigned long lsize_8 = lsize * 8;                              \
        unsigned long addr = start & ~(lsize - 1);                      \
-       unsigned long aend = (end - 1) & ~(lsize - 1);                  \
+       unsigned long aend = (end + lsize - 1) & ~(lsize - 1);          \
+       int lines = (aend - addr) / lsize;                              \
                                                                        \
        __##pfx##flush_prologue                                         \
                                                                        \
-       while (1) {                                                     \
+       while (lines >= 8) {                                            \
+               prot##cache_op(hitop, addr);                            \
+               prot##cache_op(hitop, addr + lsize);                    \
+               prot##cache_op(hitop, addr + lsize_2);                  \
+               prot##cache_op(hitop, addr + lsize_3);                  \
+               prot##cache_op(hitop, addr + lsize_4);                  \
+               prot##cache_op(hitop, addr + lsize_5);                  \
+               prot##cache_op(hitop, addr + lsize_6);                  \
+               prot##cache_op(hitop, addr + lsize_7);                  \
+               addr += lsize_8;                                        \
+               lines -= 8;                                             \
+       }                                                               \
+                                                                       \
+       if (lines & 0x4) {                                              \
+               prot##cache_op(hitop, addr);                            \
+               prot##cache_op(hitop, addr + lsize);                    \
+               prot##cache_op(hitop, addr + lsize_2);                  \
+               prot##cache_op(hitop, addr + lsize_3);                  \
+               addr += lsize_4;                                        \
+       }                                                               \
+                                                                       \
+       if (lines & 0x2) {                                              \
+               prot##cache_op(hitop, addr);                            \
+               prot##cache_op(hitop, addr + lsize);                    \
+               addr += lsize_2;                                        \
+       }                                                               \
+                                                                       \
+       if (lines & 0x1) {                                              \
                prot##cache_op(hitop, addr);                            \
-               if (addr == aend)                                       \
-                       break;                                          \
-               addr += lsize;                                          \
        }                                                               \
                                                                        \
        __##pfx##flush_epilogue                                         \