OpenWrt – Blame information for rev 2
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | From: Felix Fietkau <nbd@nbd.name> |
2 | Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to |
||
3 | |||
4 | lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c |
||
5 | Signed-off-by: Felix Fietkau <nbd@nbd.name> |
||
6 | --- |
||
7 | arch/mips/Makefile | 5 + |
||
8 | arch/mips/include/asm/module.h | 5 + |
||
9 | arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++- |
||
10 | 3 files changed, 284 insertions(+), 5 deletions(-) |
||
11 | |||
12 | --- a/arch/mips/Makefile |
||
13 | +++ b/arch/mips/Makefile |
||
14 | @@ -93,8 +93,13 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin |
||
15 | cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely |
||
16 | cflags-y += -msoft-float |
||
17 | LDFLAGS_vmlinux += -G 0 -static -n -nostdlib |
||
18 | +ifdef CONFIG_64BIT |
||
19 | KBUILD_AFLAGS_MODULE += -mlong-calls |
||
20 | KBUILD_CFLAGS_MODULE += -mlong-calls |
||
21 | +else |
||
22 | +KBUILD_AFLAGS_MODULE += -mno-long-calls |
||
23 | +KBUILD_CFLAGS_MODULE += -mno-long-calls |
||
24 | +endif |
||
25 | |||
26 | ifeq ($(CONFIG_RELOCATABLE),y) |
||
27 | LDFLAGS_vmlinux += --emit-relocs |
||
28 | --- a/arch/mips/include/asm/module.h |
||
29 | +++ b/arch/mips/include/asm/module.h |
||
30 | @@ -12,6 +12,11 @@ struct mod_arch_specific { |
||
31 | const struct exception_table_entry *dbe_start; |
||
32 | const struct exception_table_entry *dbe_end; |
||
33 | struct mips_hi16 *r_mips_hi16_list; |
||
34 | + |
||
35 | + void *phys_plt_tbl; |
||
36 | + void *virt_plt_tbl; |
||
37 | + unsigned int phys_plt_offset; |
||
38 | + unsigned int virt_plt_offset; |
||
39 | }; |
||
40 | |||
41 | typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ |
||
42 | --- a/arch/mips/kernel/module.c |
||
43 | +++ b/arch/mips/kernel/module.c |
||
44 | @@ -44,14 +44,221 @@ struct mips_hi16 { |
||
45 | static LIST_HEAD(dbe_list); |
||
46 | static DEFINE_SPINLOCK(dbe_lock); |
||
47 | |||
48 | -#ifdef MODULE_START |
||
49 | +/* |
||
50 | + * Get the potential max trampolines size required of the init and |
||
51 | + * non-init sections. Only used if we cannot find enough contiguous |
||
52 | + * physically mapped memory to put the module into. |
||
53 | + */ |
||
54 | +static unsigned int |
||
55 | +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, |
||
56 | + const char *secstrings, unsigned int symindex, bool is_init) |
||
57 | +{ |
||
58 | + unsigned long ret = 0; |
||
59 | + unsigned int i, j; |
||
60 | + Elf_Sym *syms; |
||
61 | + |
||
62 | + /* Everything marked ALLOC (this includes the exported symbols) */ |
||
63 | + for (i = 1; i < hdr->e_shnum; ++i) { |
||
64 | + unsigned int info = sechdrs[i].sh_info; |
||
65 | + |
||
66 | + if (sechdrs[i].sh_type != SHT_REL |
||
67 | + && sechdrs[i].sh_type != SHT_RELA) |
||
68 | + continue; |
||
69 | + |
||
70 | + /* Not a valid relocation section? */ |
||
71 | + if (info >= hdr->e_shnum) |
||
72 | + continue; |
||
73 | + |
||
74 | + /* Don't bother with non-allocated sections */ |
||
75 | + if (!(sechdrs[info].sh_flags & SHF_ALLOC)) |
||
76 | + continue; |
||
77 | + |
||
78 | + /* If it's called *.init*, and we're not init, we're |
||
79 | + not interested */ |
||
80 | + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) |
||
81 | + != is_init) |
||
82 | + continue; |
||
83 | + |
||
84 | + syms = (Elf_Sym *) sechdrs[symindex].sh_addr; |
||
85 | + if (sechdrs[i].sh_type == SHT_REL) { |
||
86 | + Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr; |
||
87 | + unsigned int size = sechdrs[i].sh_size / sizeof(*rel); |
||
88 | + |
||
89 | + for (j = 0; j < size; ++j) { |
||
90 | + Elf_Sym *sym; |
||
91 | + |
||
92 | + if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26) |
||
93 | + continue; |
||
94 | + |
||
95 | + sym = syms + ELF_MIPS_R_SYM(rel[j]); |
||
96 | + if (!is_init && sym->st_shndx != SHN_UNDEF) |
||
97 | + continue; |
||
98 | + |
||
99 | + ret += 4 * sizeof(int); |
||
100 | + } |
||
101 | + } else { |
||
102 | + Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr; |
||
103 | + unsigned int size = sechdrs[i].sh_size / sizeof(*rela); |
||
104 | + |
||
105 | + for (j = 0; j < size; ++j) { |
||
106 | + Elf_Sym *sym; |
||
107 | + |
||
108 | + if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26) |
||
109 | + continue; |
||
110 | + |
||
111 | + sym = syms + ELF_MIPS_R_SYM(rela[j]); |
||
112 | + if (!is_init && sym->st_shndx != SHN_UNDEF) |
||
113 | + continue; |
||
114 | + |
||
115 | + ret += 4 * sizeof(int); |
||
116 | + } |
||
117 | + } |
||
118 | + } |
||
119 | + |
||
120 | + return ret; |
||
121 | +} |
||
122 | + |
||
123 | +#ifndef MODULE_START |
||
124 | +static void *alloc_phys(unsigned long size) |
||
125 | +{ |
||
126 | + unsigned order; |
||
127 | + struct page *page; |
||
128 | + struct page *p; |
||
129 | + |
||
130 | + size = PAGE_ALIGN(size); |
||
131 | + order = get_order(size); |
||
132 | + |
||
133 | + page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN | |
||
134 | + __GFP_THISNODE, order); |
||
135 | + if (!page) |
||
136 | + return NULL; |
||
137 | + |
||
138 | + split_page(page, order); |
||
139 | + |
||
140 | + /* mark all pages except for the last one */ |
||
141 | + for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p) |
||
142 | + set_bit(PG_owner_priv_1, &p->flags); |
||
143 | + |
||
144 | + for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p) |
||
145 | + __free_page(p); |
||
146 | + |
||
147 | + return page_address(page); |
||
148 | +} |
||
149 | +#endif |
||
150 | + |
||
151 | +static void free_phys(void *ptr) |
||
152 | +{ |
||
153 | + struct page *page; |
||
154 | + bool free; |
||
155 | + |
||
156 | + page = virt_to_page(ptr); |
||
157 | + do { |
||
158 | + free = test_and_clear_bit(PG_owner_priv_1, &page->flags); |
||
159 | + __free_page(page); |
||
160 | + page++; |
||
161 | + } while (free); |
||
162 | +} |
||
163 | + |
||
164 | + |
||
165 | void *module_alloc(unsigned long size) |
||
166 | { |
||
167 | +#ifdef MODULE_START |
||
168 | return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, |
||
169 | GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, |
||
170 | __builtin_return_address(0)); |
||
171 | +#else |
||
172 | + void *ptr; |
||
173 | + |
||
174 | + if (size == 0) |
||
175 | + return NULL; |
||
176 | + |
||
177 | + ptr = alloc_phys(size); |
||
178 | + |
||
179 | + /* If we failed to allocate physically contiguous memory, |
||
180 | + * fall back to regular vmalloc. The module loader code will |
||
181 | + * create jump tables to handle long jumps */ |
||
182 | + if (!ptr) |
||
183 | + return vmalloc(size); |
||
184 | + |
||
185 | + return ptr; |
||
186 | +#endif |
||
187 | } |
||
188 | + |
||
189 | +static inline bool is_phys_addr(void *ptr) |
||
190 | +{ |
||
191 | +#ifdef CONFIG_64BIT |
||
192 | + return (KSEGX((unsigned long)ptr) == CKSEG0); |
||
193 | +#else |
||
194 | + return (KSEGX(ptr) == KSEG0); |
||
195 | #endif |
||
196 | +} |
||
197 | + |
||
198 | +/* Free memory returned from module_alloc */ |
||
199 | +void module_memfree(void *module_region) |
||
200 | +{ |
||
201 | + if (is_phys_addr(module_region)) |
||
202 | + free_phys(module_region); |
||
203 | + else |
||
204 | + vfree(module_region); |
||
205 | +} |
||
206 | + |
||
207 | +static void *__module_alloc(int size, bool phys) |
||
208 | +{ |
||
209 | + void *ptr; |
||
210 | + |
||
211 | + if (phys) |
||
212 | + ptr = kmalloc(size, GFP_KERNEL); |
||
213 | + else |
||
214 | + ptr = vmalloc(size); |
||
215 | + return ptr; |
||
216 | +} |
||
217 | + |
||
218 | +static void __module_free(void *ptr) |
||
219 | +{ |
||
220 | + if (is_phys_addr(ptr)) |
||
221 | + kfree(ptr); |
||
222 | + else |
||
223 | + vfree(ptr); |
||
224 | +} |
||
225 | + |
||
226 | +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, |
||
227 | + char *secstrings, struct module *mod) |
||
228 | +{ |
||
229 | + unsigned int symindex = 0; |
||
230 | + unsigned int core_size, init_size; |
||
231 | + int i; |
||
232 | + |
||
233 | + mod->arch.phys_plt_offset = 0; |
||
234 | + mod->arch.virt_plt_offset = 0; |
||
235 | + mod->arch.phys_plt_tbl = NULL; |
||
236 | + mod->arch.virt_plt_tbl = NULL; |
||
237 | + |
||
238 | + if (IS_ENABLED(CONFIG_64BIT)) |
||
239 | + return 0; |
||
240 | + |
||
241 | + for (i = 1; i < hdr->e_shnum; i++) |
||
242 | + if (sechdrs[i].sh_type == SHT_SYMTAB) |
||
243 | + symindex = i; |
||
244 | + |
||
245 | + core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false); |
||
246 | + init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true); |
||
247 | + |
||
248 | + if ((core_size + init_size) == 0) |
||
249 | + return 0; |
||
250 | + |
||
251 | + mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1); |
||
252 | + if (!mod->arch.phys_plt_tbl) |
||
253 | + return -ENOMEM; |
||
254 | + |
||
255 | + mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0); |
||
256 | + if (!mod->arch.virt_plt_tbl) { |
||
257 | + __module_free(mod->arch.phys_plt_tbl); |
||
258 | + mod->arch.phys_plt_tbl = NULL; |
||
259 | + return -ENOMEM; |
||
260 | + } |
||
261 | + |
||
262 | + return 0; |
||
263 | +} |
||
264 | |||
265 | static int apply_r_mips_none(struct module *me, u32 *location, |
||
266 | u32 base, Elf_Addr v, bool rela) |
||
267 | @@ -67,9 +274,40 @@ static int apply_r_mips_32(struct module |
||
268 | return 0; |
||
269 | } |
||
270 | |||
271 | +static Elf_Addr add_plt_entry_to(unsigned *plt_offset, |
||
272 | + void *start, Elf_Addr v) |
||
273 | +{ |
||
274 | + unsigned *tramp = start + *plt_offset; |
||
275 | + *plt_offset += 4 * sizeof(int); |
||
276 | + |
||
277 | + /* adjust carry for addiu */ |
||
278 | + if (v & 0x00008000) |
||
279 | + v += 0x10000; |
||
280 | + |
||
281 | + tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */ |
||
282 | + tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */ |
||
283 | + tramp[2] = 0x03200008; /* jr t9 */ |
||
284 | + tramp[3] = 0x00000000; /* nop */ |
||
285 | + |
||
286 | + return (Elf_Addr) tramp; |
||
287 | +} |
||
288 | + |
||
289 | +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v) |
||
290 | +{ |
||
291 | + if (is_phys_addr(location)) |
||
292 | + return add_plt_entry_to(&me->arch.phys_plt_offset, |
||
293 | + me->arch.phys_plt_tbl, v); |
||
294 | + else |
||
295 | + return add_plt_entry_to(&me->arch.virt_plt_offset, |
||
296 | + me->arch.virt_plt_tbl, v); |
||
297 | + |
||
298 | +} |
||
299 | + |
||
300 | static int apply_r_mips_26(struct module *me, u32 *location, |
||
301 | u32 base, Elf_Addr v, bool rela) |
||
302 | { |
||
303 | + u32 ofs = base & 0x03ffffff; |
||
304 | + |
||
305 | if (v % 4) { |
||
306 | pr_err("module %s: dangerous R_MIPS_26 relocation\n", |
||
307 | me->name); |
||
308 | @@ -77,13 +315,17 @@ static int apply_r_mips_26(struct module |
||
309 | } |
||
310 | |||
311 | if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { |
||
312 | - pr_err("module %s: relocation overflow\n", |
||
313 | - me->name); |
||
314 | - return -ENOEXEC; |
||
315 | + v = add_plt_entry(me, location, v + (ofs << 2)); |
||
316 | + if (!v) { |
||
317 | + pr_err("module %s: relocation overflow\n", |
||
318 | + me->name); |
||
319 | + return -ENOEXEC; |
||
320 | + } |
||
321 | + ofs = 0; |
||
322 | } |
||
323 | |||
324 | *location = (*location & ~0x03ffffff) | |
||
325 | - ((base + (v >> 2)) & 0x03ffffff); |
||
326 | + ((ofs + (v >> 2)) & 0x03ffffff); |
||
327 | |||
328 | return 0; |
||
329 | } |
||
330 | @@ -459,9 +701,36 @@ int module_finalize(const Elf_Ehdr *hdr, |
||
331 | list_add(&me->arch.dbe_list, &dbe_list); |
||
332 | spin_unlock_irq(&dbe_lock); |
||
333 | } |
||
334 | + |
||
335 | + /* Get rid of the fixup trampoline if we're running the module |
||
336 | + * from physically mapped address space */ |
||
337 | + if (me->arch.phys_plt_offset == 0) { |
||
338 | + __module_free(me->arch.phys_plt_tbl); |
||
339 | + me->arch.phys_plt_tbl = NULL; |
||
340 | + } |
||
341 | + if (me->arch.virt_plt_offset == 0) { |
||
342 | + __module_free(me->arch.virt_plt_tbl); |
||
343 | + me->arch.virt_plt_tbl = NULL; |
||
344 | + } |
||
345 | + |
||
346 | return 0; |
||
347 | } |
||
348 | |||
349 | +void module_arch_freeing_init(struct module *mod) |
||
350 | +{ |
||
351 | + if (mod->state == MODULE_STATE_LIVE) |
||
352 | + return; |
||
353 | + |
||
354 | + if (mod->arch.phys_plt_tbl) { |
||
355 | + __module_free(mod->arch.phys_plt_tbl); |
||
356 | + mod->arch.phys_plt_tbl = NULL; |
||
357 | + } |
||
358 | + if (mod->arch.virt_plt_tbl) { |
||
359 | + __module_free(mod->arch.virt_plt_tbl); |
||
360 | + mod->arch.virt_plt_tbl = NULL; |
||
361 | + } |
||
362 | +} |
||
363 | + |
||
364 | void module_arch_cleanup(struct module *mod) |
||
365 | { |
||
366 | spin_lock_irq(&dbe_lock); |