OpenWrt – Blame information for rev 2
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | From: Felix Fietkau <nbd@nbd.name> |
2 | Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to |
||
3 | |||
4 | lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c |
||
5 | Signed-off-by: Felix Fietkau <nbd@nbd.name> |
||
6 | --- |
||
7 | arch/mips/Makefile | 5 + |
||
8 | arch/mips/include/asm/module.h | 5 + |
||
9 | arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++- |
||
10 | 3 files changed, 284 insertions(+), 5 deletions(-) |
||
11 | |||
12 | --- a/arch/mips/Makefile |
||
13 | +++ b/arch/mips/Makefile |
||
14 | @@ -93,8 +93,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin |
||
15 | cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely |
||
16 | cflags-y += -msoft-float |
||
17 | LDFLAGS_vmlinux += -G 0 -static -n -nostdlib |
||
18 | +ifdef CONFIG_64BIT |
||
19 | KBUILD_AFLAGS_MODULE += -mlong-calls |
||
20 | KBUILD_CFLAGS_MODULE += -mlong-calls |
||
21 | +else |
||
22 | + ifdef CONFIG_DYNAMIC_FTRACE |
||
23 | + KBUILD_AFLAGS_MODULE += -mlong-calls |
||
24 | + KBUILD_CFLAGS_MODULE += -mlong-calls |
||
25 | + else |
||
26 | + KBUILD_AFLAGS_MODULE += -mno-long-calls |
||
27 | + KBUILD_CFLAGS_MODULE += -mno-long-calls |
||
28 | + endif |
||
29 | +endif |
||
30 | |||
31 | ifeq ($(CONFIG_RELOCATABLE),y) |
||
32 | LDFLAGS_vmlinux += --emit-relocs |
||
33 | --- a/arch/mips/include/asm/module.h |
||
34 | +++ b/arch/mips/include/asm/module.h |
||
35 | @@ -11,6 +11,11 @@ struct mod_arch_specific { |
||
36 | const struct exception_table_entry *dbe_start; |
||
37 | const struct exception_table_entry *dbe_end; |
||
38 | struct mips_hi16 *r_mips_hi16_list; |
||
39 | + |
||
40 | + void *phys_plt_tbl; |
||
41 | + void *virt_plt_tbl; |
||
42 | + unsigned int phys_plt_offset; |
||
43 | + unsigned int virt_plt_offset; |
||
44 | }; |
||
45 | |||
46 | typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ |
||
47 | --- a/arch/mips/kernel/module.c |
||
48 | +++ b/arch/mips/kernel/module.c |
||
49 | @@ -44,14 +44,221 @@ struct mips_hi16 { |
||
50 | static LIST_HEAD(dbe_list); |
||
51 | static DEFINE_SPINLOCK(dbe_lock); |
||
52 | |||
53 | -#ifdef MODULE_START |
||
54 | +/* |
||
55 | + * Get the potential max trampolines size required of the init and |
||
56 | + * non-init sections. Only used if we cannot find enough contiguous |
||
57 | + * physically mapped memory to put the module into. |
||
58 | + */ |
||
59 | +static unsigned int |
||
60 | +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, |
||
61 | + const char *secstrings, unsigned int symindex, bool is_init) |
||
62 | +{ |
||
63 | + unsigned long ret = 0; |
||
64 | + unsigned int i, j; |
||
65 | + Elf_Sym *syms; |
||
66 | + |
||
67 | + /* Everything marked ALLOC (this includes the exported symbols) */ |
||
68 | + for (i = 1; i < hdr->e_shnum; ++i) { |
||
69 | + unsigned int info = sechdrs[i].sh_info; |
||
70 | + |
||
71 | + if (sechdrs[i].sh_type != SHT_REL |
||
72 | + && sechdrs[i].sh_type != SHT_RELA) |
||
73 | + continue; |
||
74 | + |
||
75 | + /* Not a valid relocation section? */ |
||
76 | + if (info >= hdr->e_shnum) |
||
77 | + continue; |
||
78 | + |
||
79 | + /* Don't bother with non-allocated sections */ |
||
80 | + if (!(sechdrs[info].sh_flags & SHF_ALLOC)) |
||
81 | + continue; |
||
82 | + |
||
83 | + /* If it's called *.init*, and we're not init, we're |
||
84 | + not interested */ |
||
85 | + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) |
||
86 | + != is_init) |
||
87 | + continue; |
||
88 | + |
||
89 | + syms = (Elf_Sym *) sechdrs[symindex].sh_addr; |
||
90 | + if (sechdrs[i].sh_type == SHT_REL) { |
||
91 | + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr; |
||
92 | + unsigned int size = sechdrs[i].sh_size / sizeof(*rel); |
||
93 | + |
||
94 | + for (j = 0; j < size; ++j) { |
||
95 | + Elf_Sym *sym; |
||
96 | + |
||
97 | + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26) |
||
98 | + continue; |
||
99 | + |
||
100 | + sym = syms + ELF_MIPS_R_SYM(rel[j]); |
||
101 | + if (!is_init && sym->st_shndx != SHN_UNDEF) |
||
102 | + continue; |
||
103 | + |
||
104 | + ret += 4 * sizeof(int); |
||
105 | + } |
||
106 | + } else { |
||
107 | + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr; |
||
108 | + unsigned int size = sechdrs[i].sh_size / sizeof(*rela); |
||
109 | + |
||
110 | + for (j = 0; j < size; ++j) { |
||
111 | + Elf_Sym *sym; |
||
112 | + |
||
113 | + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26) |
||
114 | + continue; |
||
115 | + |
||
116 | + sym = syms + ELF_MIPS_R_SYM(rela[j]); |
||
117 | + if (!is_init && sym->st_shndx != SHN_UNDEF) |
||
118 | + continue; |
||
119 | + |
||
120 | + ret += 4 * sizeof(int); |
||
121 | + } |
||
122 | + } |
||
123 | + } |
||
124 | + |
||
125 | + return ret; |
||
126 | +} |
||
127 | + |
||
128 | +#ifndef MODULE_START |
||
129 | +static void *alloc_phys(unsigned long size) |
||
130 | +{ |
||
131 | + unsigned order; |
||
132 | + struct page *page; |
||
133 | + struct page *p; |
||
134 | + |
||
135 | + size = PAGE_ALIGN(size); |
||
136 | + order = get_order(size); |
||
137 | + |
||
138 | + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN | |
||
139 | + __GFP_THISNODE, order); |
||
140 | + if (!page) |
||
141 | + return NULL; |
||
142 | + |
||
143 | + split_page(page, order); |
||
144 | + |
||
145 | + /* mark all pages except for the last one */ |
||
146 | + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p) |
||
147 | + set_bit(PG_owner_priv_1, &p->flags); |
||
148 | + |
||
149 | + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p) |
||
150 | + __free_page(p); |
||
151 | + |
||
152 | + return page_address(page); |
||
153 | +} |
||
154 | +#endif |
||
155 | + |
||
156 | +static void free_phys(void *ptr) |
||
157 | +{ |
||
158 | + struct page *page; |
||
159 | + bool free; |
||
160 | + |
||
161 | + page = virt_to_page(ptr); |
||
162 | + do { |
||
163 | + free = test_and_clear_bit(PG_owner_priv_1, &page->flags); |
||
164 | + __free_page(page); |
||
165 | + page++; |
||
166 | + } while (free); |
||
167 | +} |
||
168 | + |
||
169 | + |
||
170 | void *module_alloc(unsigned long size) |
||
171 | { |
||
172 | +#ifdef MODULE_START |
||
173 | return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, |
||
174 | GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, |
||
175 | __builtin_return_address(0)); |
||
176 | +#else |
||
177 | + void *ptr; |
||
178 | + |
||
179 | + if (size == 0) |
||
180 | + return NULL; |
||
181 | + |
||
182 | + ptr = alloc_phys(size); |
||
183 | + |
||
184 | + /* If we failed to allocate physically contiguous memory, |
||
185 | + * fall back to regular vmalloc. The module loader code will |
||
186 | + * create jump tables to handle long jumps */ |
||
187 | + if (!ptr) |
||
188 | + return vmalloc(size); |
||
189 | + |
||
190 | + return ptr; |
||
191 | +#endif |
||
192 | } |
||
193 | + |
||
194 | +static inline bool is_phys_addr(void *ptr) |
||
195 | +{ |
||
196 | +#ifdef CONFIG_64BIT |
||
197 | + return (KSEGX((unsigned long)ptr) == CKSEG0); |
||
198 | +#else |
||
199 | + return (KSEGX(ptr) == KSEG0); |
||
200 | #endif |
||
201 | +} |
||
202 | + |
||
203 | +/* Free memory returned from module_alloc */ |
||
204 | +void module_memfree(void *module_region) |
||
205 | +{ |
||
206 | + if (is_phys_addr(module_region)) |
||
207 | + free_phys(module_region); |
||
208 | + else |
||
209 | + vfree(module_region); |
||
210 | +} |
||
211 | + |
||
212 | +static void *__module_alloc(int size, bool phys) |
||
213 | +{ |
||
214 | + void *ptr; |
||
215 | + |
||
216 | + if (phys) |
||
217 | + ptr = kmalloc(size, GFP_KERNEL); |
||
218 | + else |
||
219 | + ptr = vmalloc(size); |
||
220 | + return ptr; |
||
221 | +} |
||
222 | + |
||
223 | +static void __module_free(void *ptr) |
||
224 | +{ |
||
225 | + if (is_phys_addr(ptr)) |
||
226 | + kfree(ptr); |
||
227 | + else |
||
228 | + vfree(ptr); |
||
229 | +} |
||
230 | + |
||
231 | +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, |
||
232 | + char *secstrings, struct module *mod) |
||
233 | +{ |
||
234 | + unsigned int symindex = 0; |
||
235 | + unsigned int core_size, init_size; |
||
236 | + int i; |
||
237 | + |
||
238 | + mod->arch.phys_plt_offset = 0; |
||
239 | + mod->arch.virt_plt_offset = 0; |
||
240 | + mod->arch.phys_plt_tbl = NULL; |
||
241 | + mod->arch.virt_plt_tbl = NULL; |
||
242 | + |
||
243 | + if (IS_ENABLED(CONFIG_64BIT)) |
||
244 | + return 0; |
||
245 | + |
||
246 | + for (i = 1; i < hdr->e_shnum; i++) |
||
247 | + if (sechdrs[i].sh_type == SHT_SYMTAB) |
||
248 | + symindex = i; |
||
249 | + |
||
250 | + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false); |
||
251 | + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true); |
||
252 | + |
||
253 | + if ((core_size + init_size) == 0) |
||
254 | + return 0; |
||
255 | + |
||
256 | + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1); |
||
257 | + if (!mod->arch.phys_plt_tbl) |
||
258 | + return -ENOMEM; |
||
259 | + |
||
260 | + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0); |
||
261 | + if (!mod->arch.virt_plt_tbl) { |
||
262 | + __module_free(mod->arch.phys_plt_tbl); |
||
263 | + mod->arch.phys_plt_tbl = NULL; |
||
264 | + return -ENOMEM; |
||
265 | + } |
||
266 | + |
||
267 | + return 0; |
||
268 | +} |
||
269 | |||
270 | int apply_r_mips_none(struct module *me, u32 *location, Elf_Addr v) |
||
271 | { |
||
272 | @@ -65,8 +272,39 @@ static int apply_r_mips_32_rel(struct mo |
||
273 | return 0; |
||
274 | } |
||
275 | |||
276 | +static Elf_Addr add_plt_entry_to(unsigned *plt_offset, |
||
277 | + void *start, Elf_Addr v) |
||
278 | +{ |
||
279 | + unsigned *tramp = start + *plt_offset; |
||
280 | + *plt_offset += 4 * sizeof(int); |
||
281 | + |
||
282 | + /* adjust carry for addiu */ |
||
283 | + if (v & 0x00008000) |
||
284 | + v += 0x10000; |
||
285 | + |
||
286 | + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */ |
||
287 | + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */ |
||
288 | + tramp[2] = 0x03200008; /* jr t9 */ |
||
289 | + tramp[3] = 0x00000000; /* nop */ |
||
290 | + |
||
291 | + return (Elf_Addr) tramp; |
||
292 | +} |
||
293 | + |
||
294 | +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v) |
||
295 | +{ |
||
296 | + if (is_phys_addr(location)) |
||
297 | + return add_plt_entry_to(&me->arch.phys_plt_offset, |
||
298 | + me->arch.phys_plt_tbl, v); |
||
299 | + else |
||
300 | + return add_plt_entry_to(&me->arch.virt_plt_offset, |
||
301 | + me->arch.virt_plt_tbl, v); |
||
302 | + |
||
303 | +} |
||
304 | + |
||
305 | static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v) |
||
306 | { |
||
307 | + u32 ofs = *location & 0x03ffffff; |
||
308 | + |
||
309 | if (v % 4) { |
||
310 | pr_err("module %s: dangerous R_MIPS_26 REL relocation\n", |
||
311 | me->name); |
||
312 | @@ -74,13 +312,17 @@ static int apply_r_mips_26_rel(struct mo |
||
313 | } |
||
314 | |||
315 | if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { |
||
316 | - pr_err("module %s: relocation overflow\n", |
||
317 | - me->name); |
||
318 | - return -ENOEXEC; |
||
319 | + v = add_plt_entry(me, location, v + (ofs << 2)); |
||
320 | + if (!v) { |
||
321 | + pr_err("module %s: relocation overflow\n", |
||
322 | + me->name); |
||
323 | + return -ENOEXEC; |
||
324 | + } |
||
325 | + ofs = 0; |
||
326 | } |
||
327 | |||
328 | *location = (*location & ~0x03ffffff) | |
||
329 | - ((*location + (v >> 2)) & 0x03ffffff); |
||
330 | + ((ofs + (v >> 2)) & 0x03ffffff); |
||
331 | |||
332 | return 0; |
||
333 | } |
||
334 | @@ -349,9 +591,36 @@ int module_finalize(const Elf_Ehdr *hdr, |
||
335 | list_add(&me->arch.dbe_list, &dbe_list); |
||
336 | spin_unlock_irq(&dbe_lock); |
||
337 | } |
||
338 | + |
||
339 | + /* Get rid of the fixup trampoline if we're running the module |
||
340 | + * from physically mapped address space */ |
||
341 | + if (me->arch.phys_plt_offset == 0) { |
||
342 | + __module_free(me->arch.phys_plt_tbl); |
||
343 | + me->arch.phys_plt_tbl = NULL; |
||
344 | + } |
||
345 | + if (me->arch.virt_plt_offset == 0) { |
||
346 | + __module_free(me->arch.virt_plt_tbl); |
||
347 | + me->arch.virt_plt_tbl = NULL; |
||
348 | + } |
||
349 | + |
||
350 | return 0; |
||
351 | } |
||
352 | |||
353 | +void module_arch_freeing_init(struct module *mod) |
||
354 | +{ |
||
355 | + if (mod->state == MODULE_STATE_LIVE) |
||
356 | + return; |
||
357 | + |
||
358 | + if (mod->arch.phys_plt_tbl) { |
||
359 | + __module_free(mod->arch.phys_plt_tbl); |
||
360 | + mod->arch.phys_plt_tbl = NULL; |
||
361 | + } |
||
362 | + if (mod->arch.virt_plt_tbl) { |
||
363 | + __module_free(mod->arch.virt_plt_tbl); |
||
364 | + mod->arch.virt_plt_tbl = NULL; |
||
365 | + } |
||
366 | +} |
||
367 | + |
||
368 | void module_arch_cleanup(struct module *mod) |
||
369 | { |
||
370 | spin_lock_irq(&dbe_lock); |