nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 |
||
3 | * The Regents of the University of California. All rights reserved. |
||
4 | * |
||
5 | * Redistribution and use in source and binary forms, with or without |
||
6 | * modification, are permitted provided that: (1) source code distributions |
||
7 | * retain the above copyright notice and this paragraph in its entirety, (2) |
||
8 | * distributions including binary code include the above copyright notice and |
||
9 | * this paragraph in its entirety in the documentation or other materials |
||
10 | * provided with the distribution, and (3) all advertising materials mentioning |
||
11 | * features or use of this software display the following acknowledgement: |
||
12 | * ``This product includes software developed by the University of California, |
||
13 | * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of |
||
14 | * the University nor the names of its contributors may be used to endorse |
||
15 | * or promote products derived from this software without specific prior |
||
16 | * written permission. |
||
17 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED |
||
18 | * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
||
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
||
20 | * |
||
21 | * Optimization module for tcpdump intermediate representation. |
||
22 | */ |
||
23 | |||
24 | #ifdef HAVE_CONFIG_H |
||
25 | #include "config.h" |
||
26 | #endif |
||
27 | |||
28 | #ifdef WIN32 |
||
29 | #include <pcap-stdinc.h> |
||
30 | #else /* WIN32 */ |
||
31 | #if HAVE_INTTYPES_H |
||
32 | #include <inttypes.h> |
||
33 | #elif HAVE_STDINT_H |
||
34 | #include <stdint.h> |
||
35 | #endif |
||
36 | #ifdef HAVE_SYS_BITYPES_H |
||
37 | #include <sys/bitypes.h> |
||
38 | #endif |
||
39 | #include <sys/types.h> |
||
40 | #endif /* WIN32 */ |
||
41 | |||
42 | #include <stdio.h> |
||
43 | #include <stdlib.h> |
||
44 | #include <memory.h> |
||
45 | #include <string.h> |
||
46 | |||
47 | #include <errno.h> |
||
48 | |||
49 | #include "pcap-int.h" |
||
50 | |||
51 | #include "gencode.h" |
||
52 | |||
53 | #ifdef HAVE_OS_PROTO_H |
||
54 | #include "os-proto.h" |
||
55 | #endif |
||
56 | |||
57 | #ifdef BDEBUG |
||
58 | extern int dflag; |
||
59 | #endif |
||
60 | |||
61 | #if defined(MSDOS) && !defined(__DJGPP__) |
||
62 | extern int _w32_ffs (int mask); |
||
63 | #define ffs _w32_ffs |
||
64 | #endif |
||
65 | |||
66 | #if defined(WIN32) && defined (_MSC_VER) |
||
67 | int ffs(int mask); |
||
68 | #endif |
||
69 | |||
70 | /* |
||
71 | * Represents a deleted instruction. |
||
72 | */ |
||
73 | #define NOP -1 |
||
74 | |||
75 | /* |
||
76 | * Register numbers for use-def values. |
||
77 | * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory |
||
78 | * location. A_ATOM is the accumulator and X_ATOM is the index |
||
79 | * register. |
||
80 | */ |
||
81 | #define A_ATOM BPF_MEMWORDS |
||
82 | #define X_ATOM (BPF_MEMWORDS+1) |
||
83 | |||
84 | /* |
||
85 | * This define is used to represent *both* the accumulator and |
||
86 | * x register in use-def computations. |
||
87 | * Currently, the use-def code assumes only one definition per instruction. |
||
88 | */ |
||
89 | #define AX_ATOM N_ATOMS |
||
90 | |||
91 | /* |
||
92 | * A flag to indicate that further optimization is needed. |
||
93 | * Iterative passes are continued until a given pass yields no |
||
94 | * branch movement. |
||
95 | */ |
||
96 | static int done; |
||
97 | |||
98 | /* |
||
99 | * A block is marked if only if its mark equals the current mark. |
||
100 | * Rather than traverse the code array, marking each item, 'cur_mark' is |
||
101 | * incremented. This automatically makes each element unmarked. |
||
102 | */ |
||
103 | static int cur_mark; |
||
104 | #define isMarked(p) ((p)->mark == cur_mark) |
||
105 | #define unMarkAll() cur_mark += 1 |
||
106 | #define Mark(p) ((p)->mark = cur_mark) |
||
107 | |||
108 | static void opt_init(struct block *); |
||
109 | static void opt_cleanup(void); |
||
110 | |||
111 | static void intern_blocks(struct block *); |
||
112 | |||
113 | static void find_inedges(struct block *); |
||
114 | #ifdef BDEBUG |
||
115 | static void opt_dump(struct block *); |
||
116 | #endif |
||
117 | |||
118 | static int n_blocks; |
||
119 | struct block **blocks; |
||
120 | static int n_edges; |
||
121 | struct edge **edges; |
||
122 | |||
123 | /* |
||
124 | * A bit vector set representation of the dominators. |
||
125 | * We round up the set size to the next power of two. |
||
126 | */ |
||
127 | static int nodewords; |
||
128 | static int edgewords; |
||
129 | struct block **levels; |
||
130 | bpf_u_int32 *space; |
||
131 | #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) |
||
132 | /* |
||
133 | * True if a is in uset {p} |
||
134 | */ |
||
135 | #define SET_MEMBER(p, a) \ |
||
136 | ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) |
||
137 | |||
138 | /* |
||
139 | * Add 'a' to uset p. |
||
140 | */ |
||
141 | #define SET_INSERT(p, a) \ |
||
142 | (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) |
||
143 | |||
144 | /* |
||
145 | * Delete 'a' from uset p. |
||
146 | */ |
||
147 | #define SET_DELETE(p, a) \ |
||
148 | (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) |
||
149 | |||
150 | /* |
||
151 | * a := a intersect b |
||
152 | */ |
||
153 | #define SET_INTERSECT(a, b, n)\ |
||
154 | {\ |
||
155 | register bpf_u_int32 *_x = a, *_y = b;\ |
||
156 | register int _n = n;\ |
||
157 | while (--_n >= 0) *_x++ &= *_y++;\ |
||
158 | } |
||
159 | |||
160 | /* |
||
161 | * a := a - b |
||
162 | */ |
||
163 | #define SET_SUBTRACT(a, b, n)\ |
||
164 | {\ |
||
165 | register bpf_u_int32 *_x = a, *_y = b;\ |
||
166 | register int _n = n;\ |
||
167 | while (--_n >= 0) *_x++ &=~ *_y++;\ |
||
168 | } |
||
169 | |||
170 | /* |
||
171 | * a := a union b |
||
172 | */ |
||
173 | #define SET_UNION(a, b, n)\ |
||
174 | {\ |
||
175 | register bpf_u_int32 *_x = a, *_y = b;\ |
||
176 | register int _n = n;\ |
||
177 | while (--_n >= 0) *_x++ |= *_y++;\ |
||
178 | } |
||
179 | |||
180 | static uset all_dom_sets; |
||
181 | static uset all_closure_sets; |
||
182 | static uset all_edge_sets; |
||
183 | |||
184 | #ifndef MAX |
||
185 | #define MAX(a,b) ((a)>(b)?(a):(b)) |
||
186 | #endif |
||
187 | |||
188 | static void |
||
189 | find_levels_r(struct block *b) |
||
190 | { |
||
191 | int level; |
||
192 | |||
193 | if (isMarked(b)) |
||
194 | return; |
||
195 | |||
196 | Mark(b); |
||
197 | b->link = 0; |
||
198 | |||
199 | if (JT(b)) { |
||
200 | find_levels_r(JT(b)); |
||
201 | find_levels_r(JF(b)); |
||
202 | level = MAX(JT(b)->level, JF(b)->level) + 1; |
||
203 | } else |
||
204 | level = 0; |
||
205 | b->level = level; |
||
206 | b->link = levels[level]; |
||
207 | levels[level] = b; |
||
208 | } |
||
209 | |||
210 | /* |
||
211 | * Level graph. The levels go from 0 at the leaves to |
||
212 | * N_LEVELS at the root. The levels[] array points to the |
||
213 | * first node of the level list, whose elements are linked |
||
214 | * with the 'link' field of the struct block. |
||
215 | */ |
||
216 | static void |
||
217 | find_levels(struct block *root) |
||
218 | { |
||
219 | memset((char *)levels, 0, n_blocks * sizeof(*levels)); |
||
220 | unMarkAll(); |
||
221 | find_levels_r(root); |
||
222 | } |
||
223 | |||
224 | /* |
||
225 | * Find dominator relationships. |
||
226 | * Assumes graph has been leveled. |
||
227 | */ |
||
228 | static void |
||
229 | find_dom(struct block *root) |
||
230 | { |
||
231 | int i; |
||
232 | struct block *b; |
||
233 | bpf_u_int32 *x; |
||
234 | |||
235 | /* |
||
236 | * Initialize sets to contain all nodes. |
||
237 | */ |
||
238 | x = all_dom_sets; |
||
239 | i = n_blocks * nodewords; |
||
240 | while (--i >= 0) |
||
241 | *x++ = ~0; |
||
242 | /* Root starts off empty. */ |
||
243 | for (i = nodewords; --i >= 0;) |
||
244 | root->dom[i] = 0; |
||
245 | |||
246 | /* root->level is the highest level no found. */ |
||
247 | for (i = root->level; i >= 0; --i) { |
||
248 | for (b = levels[i]; b; b = b->link) { |
||
249 | SET_INSERT(b->dom, b->id); |
||
250 | if (JT(b) == 0) |
||
251 | continue; |
||
252 | SET_INTERSECT(JT(b)->dom, b->dom, nodewords); |
||
253 | SET_INTERSECT(JF(b)->dom, b->dom, nodewords); |
||
254 | } |
||
255 | } |
||
256 | } |
||
257 | |||
258 | static void |
||
259 | propedom(struct edge *ep) |
||
260 | { |
||
261 | SET_INSERT(ep->edom, ep->id); |
||
262 | if (ep->succ) { |
||
263 | SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); |
||
264 | SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); |
||
265 | } |
||
266 | } |
||
267 | |||
268 | /* |
||
269 | * Compute edge dominators. |
||
270 | * Assumes graph has been leveled and predecessors established. |
||
271 | */ |
||
272 | static void |
||
273 | find_edom(struct block *root) |
||
274 | { |
||
275 | int i; |
||
276 | uset x; |
||
277 | struct block *b; |
||
278 | |||
279 | x = all_edge_sets; |
||
280 | for (i = n_edges * edgewords; --i >= 0; ) |
||
281 | x[i] = ~0; |
||
282 | |||
283 | /* root->level is the highest level no found. */ |
||
284 | memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); |
||
285 | memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); |
||
286 | for (i = root->level; i >= 0; --i) { |
||
287 | for (b = levels[i]; b != 0; b = b->link) { |
||
288 | propedom(&b->et); |
||
289 | propedom(&b->ef); |
||
290 | } |
||
291 | } |
||
292 | } |
||
293 | |||
294 | /* |
||
295 | * Find the backwards transitive closure of the flow graph. These sets |
||
296 | * are backwards in the sense that we find the set of nodes that reach |
||
297 | * a given node, not the set of nodes that can be reached by a node. |
||
298 | * |
||
299 | * Assumes graph has been leveled. |
||
300 | */ |
||
301 | static void |
||
302 | find_closure(struct block *root) |
||
303 | { |
||
304 | int i; |
||
305 | struct block *b; |
||
306 | |||
307 | /* |
||
308 | * Initialize sets to contain no nodes. |
||
309 | */ |
||
310 | memset((char *)all_closure_sets, 0, |
||
311 | n_blocks * nodewords * sizeof(*all_closure_sets)); |
||
312 | |||
313 | /* root->level is the highest level no found. */ |
||
314 | for (i = root->level; i >= 0; --i) { |
||
315 | for (b = levels[i]; b; b = b->link) { |
||
316 | SET_INSERT(b->closure, b->id); |
||
317 | if (JT(b) == 0) |
||
318 | continue; |
||
319 | SET_UNION(JT(b)->closure, b->closure, nodewords); |
||
320 | SET_UNION(JF(b)->closure, b->closure, nodewords); |
||
321 | } |
||
322 | } |
||
323 | } |
||
324 | |||
325 | /* |
||
326 | * Return the register number that is used by s. If A and X are both |
||
327 | * used, return AX_ATOM. If no register is used, return -1. |
||
328 | * |
||
329 | * The implementation should probably change to an array access. |
||
330 | */ |
||
331 | static int |
||
332 | atomuse(struct stmt *s) |
||
333 | { |
||
334 | register int c = s->code; |
||
335 | |||
336 | if (c == NOP) |
||
337 | return -1; |
||
338 | |||
339 | switch (BPF_CLASS(c)) { |
||
340 | |||
341 | case BPF_RET: |
||
342 | return (BPF_RVAL(c) == BPF_A) ? A_ATOM : |
||
343 | (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; |
||
344 | |||
345 | case BPF_LD: |
||
346 | case BPF_LDX: |
||
347 | return (BPF_MODE(c) == BPF_IND) ? X_ATOM : |
||
348 | (BPF_MODE(c) == BPF_MEM) ? s->k : -1; |
||
349 | |||
350 | case BPF_ST: |
||
351 | return A_ATOM; |
||
352 | |||
353 | case BPF_STX: |
||
354 | return X_ATOM; |
||
355 | |||
356 | case BPF_JMP: |
||
357 | case BPF_ALU: |
||
358 | if (BPF_SRC(c) == BPF_X) |
||
359 | return AX_ATOM; |
||
360 | return A_ATOM; |
||
361 | |||
362 | case BPF_MISC: |
||
363 | return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; |
||
364 | } |
||
365 | abort(); |
||
366 | /* NOTREACHED */ |
||
367 | } |
||
368 | |||
369 | /* |
||
370 | * Return the register number that is defined by 's'. We assume that |
||
371 | * a single stmt cannot define more than one register. If no register |
||
372 | * is defined, return -1. |
||
373 | * |
||
374 | * The implementation should probably change to an array access. |
||
375 | */ |
||
376 | static int |
||
377 | atomdef(struct stmt *s) |
||
378 | { |
||
379 | if (s->code == NOP) |
||
380 | return -1; |
||
381 | |||
382 | switch (BPF_CLASS(s->code)) { |
||
383 | |||
384 | case BPF_LD: |
||
385 | case BPF_ALU: |
||
386 | return A_ATOM; |
||
387 | |||
388 | case BPF_LDX: |
||
389 | return X_ATOM; |
||
390 | |||
391 | case BPF_ST: |
||
392 | case BPF_STX: |
||
393 | return s->k; |
||
394 | |||
395 | case BPF_MISC: |
||
396 | return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; |
||
397 | } |
||
398 | return -1; |
||
399 | } |
||
400 | |||
401 | /* |
||
402 | * Compute the sets of registers used, defined, and killed by 'b'. |
||
403 | * |
||
404 | * "Used" means that a statement in 'b' uses the register before any |
||
405 | * statement in 'b' defines it, i.e. it uses the value left in |
||
406 | * that register by a predecessor block of this block. |
||
407 | * "Defined" means that a statement in 'b' defines it. |
||
408 | * "Killed" means that a statement in 'b' defines it before any |
||
409 | * statement in 'b' uses it, i.e. it kills the value left in that |
||
410 | * register by a predecessor block of this block. |
||
411 | */ |
||
412 | static void |
||
413 | compute_local_ud(struct block *b) |
||
414 | { |
||
415 | struct slist *s; |
||
416 | atomset def = 0, use = 0, kill = 0; |
||
417 | int atom; |
||
418 | |||
419 | for (s = b->stmts; s; s = s->next) { |
||
420 | if (s->s.code == NOP) |
||
421 | continue; |
||
422 | atom = atomuse(&s->s); |
||
423 | if (atom >= 0) { |
||
424 | if (atom == AX_ATOM) { |
||
425 | if (!ATOMELEM(def, X_ATOM)) |
||
426 | use |= ATOMMASK(X_ATOM); |
||
427 | if (!ATOMELEM(def, A_ATOM)) |
||
428 | use |= ATOMMASK(A_ATOM); |
||
429 | } |
||
430 | else if (atom < N_ATOMS) { |
||
431 | if (!ATOMELEM(def, atom)) |
||
432 | use |= ATOMMASK(atom); |
||
433 | } |
||
434 | else |
||
435 | abort(); |
||
436 | } |
||
437 | atom = atomdef(&s->s); |
||
438 | if (atom >= 0) { |
||
439 | if (!ATOMELEM(use, atom)) |
||
440 | kill |= ATOMMASK(atom); |
||
441 | def |= ATOMMASK(atom); |
||
442 | } |
||
443 | } |
||
444 | if (BPF_CLASS(b->s.code) == BPF_JMP) { |
||
445 | /* |
||
446 | * XXX - what about RET? |
||
447 | */ |
||
448 | atom = atomuse(&b->s); |
||
449 | if (atom >= 0) { |
||
450 | if (atom == AX_ATOM) { |
||
451 | if (!ATOMELEM(def, X_ATOM)) |
||
452 | use |= ATOMMASK(X_ATOM); |
||
453 | if (!ATOMELEM(def, A_ATOM)) |
||
454 | use |= ATOMMASK(A_ATOM); |
||
455 | } |
||
456 | else if (atom < N_ATOMS) { |
||
457 | if (!ATOMELEM(def, atom)) |
||
458 | use |= ATOMMASK(atom); |
||
459 | } |
||
460 | else |
||
461 | abort(); |
||
462 | } |
||
463 | } |
||
464 | |||
465 | b->def = def; |
||
466 | b->kill = kill; |
||
467 | b->in_use = use; |
||
468 | } |
||
469 | |||
470 | /* |
||
471 | * Assume graph is already leveled. |
||
472 | */ |
||
473 | static void |
||
474 | find_ud(struct block *root) |
||
475 | { |
||
476 | int i, maxlevel; |
||
477 | struct block *p; |
||
478 | |||
479 | /* |
||
480 | * root->level is the highest level no found; |
||
481 | * count down from there. |
||
482 | */ |
||
483 | maxlevel = root->level; |
||
484 | for (i = maxlevel; i >= 0; --i) |
||
485 | for (p = levels[i]; p; p = p->link) { |
||
486 | compute_local_ud(p); |
||
487 | p->out_use = 0; |
||
488 | } |
||
489 | |||
490 | for (i = 1; i <= maxlevel; ++i) { |
||
491 | for (p = levels[i]; p; p = p->link) { |
||
492 | p->out_use |= JT(p)->in_use | JF(p)->in_use; |
||
493 | p->in_use |= p->out_use &~ p->kill; |
||
494 | } |
||
495 | } |
||
496 | } |
||
497 | |||
498 | /* |
||
499 | * These data structures are used in a Cocke and Shwarz style |
||
500 | * value numbering scheme. Since the flowgraph is acyclic, |
||
501 | * exit values can be propagated from a node's predecessors |
||
502 | * provided it is uniquely defined. |
||
503 | */ |
||
504 | struct valnode { |
||
505 | int code; |
||
506 | int v0, v1; |
||
507 | int val; |
||
508 | struct valnode *next; |
||
509 | }; |
||
510 | |||
511 | #define MODULUS 213 |
||
512 | static struct valnode *hashtbl[MODULUS]; |
||
513 | static int curval; |
||
514 | static int maxval; |
||
515 | |||
516 | /* Integer constants mapped with the load immediate opcode. */ |
||
517 | #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) |
||
518 | |||
519 | struct vmapinfo { |
||
520 | int is_const; |
||
521 | bpf_int32 const_val; |
||
522 | }; |
||
523 | |||
524 | struct vmapinfo *vmap; |
||
525 | struct valnode *vnode_base; |
||
526 | struct valnode *next_vnode; |
||
527 | |||
528 | static void |
||
529 | init_val(void) |
||
530 | { |
||
531 | curval = 0; |
||
532 | next_vnode = vnode_base; |
||
533 | memset((char *)vmap, 0, maxval * sizeof(*vmap)); |
||
534 | memset((char *)hashtbl, 0, sizeof hashtbl); |
||
535 | } |
||
536 | |||
537 | /* Because we really don't have an IR, this stuff is a little messy. */ |
||
538 | static int |
||
539 | F(int code, int v0, int v1) |
||
540 | { |
||
541 | u_int hash; |
||
542 | int val; |
||
543 | struct valnode *p; |
||
544 | |||
545 | hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); |
||
546 | hash %= MODULUS; |
||
547 | |||
548 | for (p = hashtbl[hash]; p; p = p->next) |
||
549 | if (p->code == code && p->v0 == v0 && p->v1 == v1) |
||
550 | return p->val; |
||
551 | |||
552 | val = ++curval; |
||
553 | if (BPF_MODE(code) == BPF_IMM && |
||
554 | (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { |
||
555 | vmap[val].const_val = v0; |
||
556 | vmap[val].is_const = 1; |
||
557 | } |
||
558 | p = next_vnode++; |
||
559 | p->val = val; |
||
560 | p->code = code; |
||
561 | p->v0 = v0; |
||
562 | p->v1 = v1; |
||
563 | p->next = hashtbl[hash]; |
||
564 | hashtbl[hash] = p; |
||
565 | |||
566 | return val; |
||
567 | } |
||
568 | |||
569 | static inline void |
||
570 | vstore(struct stmt *s, int *valp, int newval, int alter) |
||
571 | { |
||
572 | if (alter && *valp == newval) |
||
573 | s->code = NOP; |
||
574 | else |
||
575 | *valp = newval; |
||
576 | } |
||
577 | |||
578 | /* |
||
579 | * Do constant-folding on binary operators. |
||
580 | * (Unary operators are handled elsewhere.) |
||
581 | */ |
||
582 | static void |
||
583 | fold_op(struct stmt *s, int v0, int v1) |
||
584 | { |
||
585 | bpf_u_int32 a, b; |
||
586 | |||
587 | a = vmap[v0].const_val; |
||
588 | b = vmap[v1].const_val; |
||
589 | |||
590 | switch (BPF_OP(s->code)) { |
||
591 | case BPF_ADD: |
||
592 | a += b; |
||
593 | break; |
||
594 | |||
595 | case BPF_SUB: |
||
596 | a -= b; |
||
597 | break; |
||
598 | |||
599 | case BPF_MUL: |
||
600 | a *= b; |
||
601 | break; |
||
602 | |||
603 | case BPF_DIV: |
||
604 | if (b == 0) |
||
605 | bpf_error("division by zero"); |
||
606 | a /= b; |
||
607 | break; |
||
608 | |||
609 | case BPF_MOD: |
||
610 | if (b == 0) |
||
611 | bpf_error("modulus by zero"); |
||
612 | a %= b; |
||
613 | break; |
||
614 | |||
615 | case BPF_AND: |
||
616 | a &= b; |
||
617 | break; |
||
618 | |||
619 | case BPF_OR: |
||
620 | a |= b; |
||
621 | break; |
||
622 | |||
623 | case BPF_XOR: |
||
624 | a ^= b; |
||
625 | break; |
||
626 | |||
627 | case BPF_LSH: |
||
628 | a <<= b; |
||
629 | break; |
||
630 | |||
631 | case BPF_RSH: |
||
632 | a >>= b; |
||
633 | break; |
||
634 | |||
635 | default: |
||
636 | abort(); |
||
637 | } |
||
638 | s->k = a; |
||
639 | s->code = BPF_LD|BPF_IMM; |
||
640 | done = 0; |
||
641 | } |
||
642 | |||
643 | static inline struct slist * |
||
644 | this_op(struct slist *s) |
||
645 | { |
||
646 | while (s != 0 && s->s.code == NOP) |
||
647 | s = s->next; |
||
648 | return s; |
||
649 | } |
||
650 | |||
651 | static void |
||
652 | opt_not(struct block *b) |
||
653 | { |
||
654 | struct block *tmp = JT(b); |
||
655 | |||
656 | JT(b) = JF(b); |
||
657 | JF(b) = tmp; |
||
658 | } |
||
659 | |||
660 | static void |
||
661 | opt_peep(struct block *b) |
||
662 | { |
||
663 | struct slist *s; |
||
664 | struct slist *next, *last; |
||
665 | int val; |
||
666 | |||
667 | s = b->stmts; |
||
668 | if (s == 0) |
||
669 | return; |
||
670 | |||
671 | last = s; |
||
672 | for (/*empty*/; /*empty*/; s = next) { |
||
673 | /* |
||
674 | * Skip over nops. |
||
675 | */ |
||
676 | s = this_op(s); |
||
677 | if (s == 0) |
||
678 | break; /* nothing left in the block */ |
||
679 | |||
680 | /* |
||
681 | * Find the next real instruction after that one |
||
682 | * (skipping nops). |
||
683 | */ |
||
684 | next = this_op(s->next); |
||
685 | if (next == 0) |
||
686 | break; /* no next instruction */ |
||
687 | last = next; |
||
688 | |||
689 | /* |
||
690 | * st M[k] --> st M[k] |
||
691 | * ldx M[k] tax |
||
692 | */ |
||
693 | if (s->s.code == BPF_ST && |
||
694 | next->s.code == (BPF_LDX|BPF_MEM) && |
||
695 | s->s.k == next->s.k) { |
||
696 | done = 0; |
||
697 | next->s.code = BPF_MISC|BPF_TAX; |
||
698 | } |
||
699 | /* |
||
700 | * ld #k --> ldx #k |
||
701 | * tax txa |
||
702 | */ |
||
703 | if (s->s.code == (BPF_LD|BPF_IMM) && |
||
704 | next->s.code == (BPF_MISC|BPF_TAX)) { |
||
705 | s->s.code = BPF_LDX|BPF_IMM; |
||
706 | next->s.code = BPF_MISC|BPF_TXA; |
||
707 | done = 0; |
||
708 | } |
||
709 | /* |
||
710 | * This is an ugly special case, but it happens |
||
711 | * when you say tcp[k] or udp[k] where k is a constant. |
||
712 | */ |
||
713 | if (s->s.code == (BPF_LD|BPF_IMM)) { |
||
714 | struct slist *add, *tax, *ild; |
||
715 | |||
716 | /* |
||
717 | * Check that X isn't used on exit from this |
||
718 | * block (which the optimizer might cause). |
||
719 | * We know the code generator won't generate |
||
720 | * any local dependencies. |
||
721 | */ |
||
722 | if (ATOMELEM(b->out_use, X_ATOM)) |
||
723 | continue; |
||
724 | |||
725 | /* |
||
726 | * Check that the instruction following the ldi |
||
727 | * is an addx, or it's an ldxms with an addx |
||
728 | * following it (with 0 or more nops between the |
||
729 | * ldxms and addx). |
||
730 | */ |
||
731 | if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) |
||
732 | add = next; |
||
733 | else |
||
734 | add = this_op(next->next); |
||
735 | if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) |
||
736 | continue; |
||
737 | |||
738 | /* |
||
739 | * Check that a tax follows that (with 0 or more |
||
740 | * nops between them). |
||
741 | */ |
||
742 | tax = this_op(add->next); |
||
743 | if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) |
||
744 | continue; |
||
745 | |||
746 | /* |
||
747 | * Check that an ild follows that (with 0 or more |
||
748 | * nops between them). |
||
749 | */ |
||
750 | ild = this_op(tax->next); |
||
751 | if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || |
||
752 | BPF_MODE(ild->s.code) != BPF_IND) |
||
753 | continue; |
||
754 | /* |
||
755 | * We want to turn this sequence: |
||
756 | * |
||
757 | * (004) ldi #0x2 {s} |
||
758 | * (005) ldxms [14] {next} -- optional |
||
759 | * (006) addx {add} |
||
760 | * (007) tax {tax} |
||
761 | * (008) ild [x+0] {ild} |
||
762 | * |
||
763 | * into this sequence: |
||
764 | * |
||
765 | * (004) nop |
||
766 | * (005) ldxms [14] |
||
767 | * (006) nop |
||
768 | * (007) nop |
||
769 | * (008) ild [x+2] |
||
770 | * |
||
771 | * XXX We need to check that X is not |
||
772 | * subsequently used, because we want to change |
||
773 | * what'll be in it after this sequence. |
||
774 | * |
||
775 | * We know we can eliminate the accumulator |
||
776 | * modifications earlier in the sequence since |
||
777 | * it is defined by the last stmt of this sequence |
||
778 | * (i.e., the last statement of the sequence loads |
||
779 | * a value into the accumulator, so we can eliminate |
||
780 | * earlier operations on the accumulator). |
||
781 | */ |
||
782 | ild->s.k += s->s.k; |
||
783 | s->s.code = NOP; |
||
784 | add->s.code = NOP; |
||
785 | tax->s.code = NOP; |
||
786 | done = 0; |
||
787 | } |
||
788 | } |
||
789 | /* |
||
790 | * If the comparison at the end of a block is an equality |
||
791 | * comparison against a constant, and nobody uses the value |
||
792 | * we leave in the A register at the end of a block, and |
||
793 | * the operation preceding the comparison is an arithmetic |
||
794 | * operation, we can sometime optimize it away. |
||
795 | */ |
||
796 | if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && |
||
797 | !ATOMELEM(b->out_use, A_ATOM)) { |
||
798 | /* |
||
799 | * We can optimize away certain subtractions of the |
||
800 | * X register. |
||
801 | */ |
||
802 | if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { |
||
803 | val = b->val[X_ATOM]; |
||
804 | if (vmap[val].is_const) { |
||
805 | /* |
||
806 | * If we have a subtract to do a comparison, |
||
807 | * and the X register is a known constant, |
||
808 | * we can merge this value into the |
||
809 | * comparison: |
||
810 | * |
||
811 | * sub x -> nop |
||
812 | * jeq #y jeq #(x+y) |
||
813 | */ |
||
814 | b->s.k += vmap[val].const_val; |
||
815 | last->s.code = NOP; |
||
816 | done = 0; |
||
817 | } else if (b->s.k == 0) { |
||
818 | /* |
||
819 | * If the X register isn't a constant, |
||
820 | * and the comparison in the test is |
||
821 | * against 0, we can compare with the |
||
822 | * X register, instead: |
||
823 | * |
||
824 | * sub x -> nop |
||
825 | * jeq #0 jeq x |
||
826 | */ |
||
827 | last->s.code = NOP; |
||
828 | b->s.code = BPF_JMP|BPF_JEQ|BPF_X; |
||
829 | done = 0; |
||
830 | } |
||
831 | } |
||
832 | /* |
||
833 | * Likewise, a constant subtract can be simplified: |
||
834 | * |
||
835 | * sub #x -> nop |
||
836 | * jeq #y -> jeq #(x+y) |
||
837 | */ |
||
838 | else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { |
||
839 | last->s.code = NOP; |
||
840 | b->s.k += last->s.k; |
||
841 | done = 0; |
||
842 | } |
||
843 | /* |
||
844 | * And, similarly, a constant AND can be simplified |
||
845 | * if we're testing against 0, i.e.: |
||
846 | * |
||
847 | * and #k nop |
||
848 | * jeq #0 -> jset #k |
||
849 | */ |
||
850 | else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && |
||
851 | b->s.k == 0) { |
||
852 | b->s.k = last->s.k; |
||
853 | b->s.code = BPF_JMP|BPF_K|BPF_JSET; |
||
854 | last->s.code = NOP; |
||
855 | done = 0; |
||
856 | opt_not(b); |
||
857 | } |
||
858 | } |
||
859 | /* |
||
860 | * jset #0 -> never |
||
861 | * jset #ffffffff -> always |
||
862 | */ |
||
863 | if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { |
||
864 | if (b->s.k == 0) |
||
865 | JT(b) = JF(b); |
||
866 | if (b->s.k == 0xffffffff) |
||
867 | JF(b) = JT(b); |
||
868 | } |
||
869 | /* |
||
870 | * If we're comparing against the index register, and the index |
||
871 | * register is a known constant, we can just compare against that |
||
872 | * constant. |
||
873 | */ |
||
874 | val = b->val[X_ATOM]; |
||
875 | if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { |
||
876 | bpf_int32 v = vmap[val].const_val; |
||
877 | b->s.code &= ~BPF_X; |
||
878 | b->s.k = v; |
||
879 | } |
||
880 | /* |
||
881 | * If the accumulator is a known constant, we can compute the |
||
882 | * comparison result. |
||
883 | */ |
||
884 | val = b->val[A_ATOM]; |
||
885 | if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { |
||
886 | bpf_int32 v = vmap[val].const_val; |
||
887 | switch (BPF_OP(b->s.code)) { |
||
888 | |||
889 | case BPF_JEQ: |
||
890 | v = v == b->s.k; |
||
891 | break; |
||
892 | |||
893 | case BPF_JGT: |
||
894 | v = (unsigned)v > b->s.k; |
||
895 | break; |
||
896 | |||
897 | case BPF_JGE: |
||
898 | v = (unsigned)v >= b->s.k; |
||
899 | break; |
||
900 | |||
901 | case BPF_JSET: |
||
902 | v &= b->s.k; |
||
903 | break; |
||
904 | |||
905 | default: |
||
906 | abort(); |
||
907 | } |
||
908 | if (JF(b) != JT(b)) |
||
909 | done = 0; |
||
910 | if (v) |
||
911 | JF(b) = JT(b); |
||
912 | else |
||
913 | JT(b) = JF(b); |
||
914 | } |
||
915 | } |
||
916 | |||
917 | /* |
||
918 | * Compute the symbolic value of expression of 's', and update |
||
919 | * anything it defines in the value table 'val'. If 'alter' is true, |
||
920 | * do various optimizations. This code would be cleaner if symbolic |
||
921 | * evaluation and code transformations weren't folded together. |
||
922 | */ |
||
923 | static void |
||
924 | opt_stmt(struct stmt *s, int val[], int alter) |
||
925 | { |
||
926 | int op; |
||
927 | int v; |
||
928 | |||
929 | switch (s->code) { |
||
930 | |||
931 | case BPF_LD|BPF_ABS|BPF_W: |
||
932 | case BPF_LD|BPF_ABS|BPF_H: |
||
933 | case BPF_LD|BPF_ABS|BPF_B: |
||
934 | v = F(s->code, s->k, 0L); |
||
935 | vstore(s, &val[A_ATOM], v, alter); |
||
936 | break; |
||
937 | |||
938 | case BPF_LD|BPF_IND|BPF_W: |
||
939 | case BPF_LD|BPF_IND|BPF_H: |
||
940 | case BPF_LD|BPF_IND|BPF_B: |
||
941 | v = val[X_ATOM]; |
||
942 | if (alter && vmap[v].is_const) { |
||
943 | s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); |
||
944 | s->k += vmap[v].const_val; |
||
945 | v = F(s->code, s->k, 0L); |
||
946 | done = 0; |
||
947 | } |
||
948 | else |
||
949 | v = F(s->code, s->k, v); |
||
950 | vstore(s, &val[A_ATOM], v, alter); |
||
951 | break; |
||
952 | |||
953 | case BPF_LD|BPF_LEN: |
||
954 | v = F(s->code, 0L, 0L); |
||
955 | vstore(s, &val[A_ATOM], v, alter); |
||
956 | break; |
||
957 | |||
958 | case BPF_LD|BPF_IMM: |
||
959 | v = K(s->k); |
||
960 | vstore(s, &val[A_ATOM], v, alter); |
||
961 | break; |
||
962 | |||
963 | case BPF_LDX|BPF_IMM: |
||
964 | v = K(s->k); |
||
965 | vstore(s, &val[X_ATOM], v, alter); |
||
966 | break; |
||
967 | |||
968 | case BPF_LDX|BPF_MSH|BPF_B: |
||
969 | v = F(s->code, s->k, 0L); |
||
970 | vstore(s, &val[X_ATOM], v, alter); |
||
971 | break; |
||
972 | |||
973 | case BPF_ALU|BPF_NEG: |
||
974 | if (alter && vmap[val[A_ATOM]].is_const) { |
||
975 | s->code = BPF_LD|BPF_IMM; |
||
976 | s->k = -vmap[val[A_ATOM]].const_val; |
||
977 | val[A_ATOM] = K(s->k); |
||
978 | } |
||
979 | else |
||
980 | val[A_ATOM] = F(s->code, val[A_ATOM], 0L); |
||
981 | break; |
||
982 | |||
983 | case BPF_ALU|BPF_ADD|BPF_K: |
||
984 | case BPF_ALU|BPF_SUB|BPF_K: |
||
985 | case BPF_ALU|BPF_MUL|BPF_K: |
||
986 | case BPF_ALU|BPF_DIV|BPF_K: |
||
987 | case BPF_ALU|BPF_MOD|BPF_K: |
||
988 | case BPF_ALU|BPF_AND|BPF_K: |
||
989 | case BPF_ALU|BPF_OR|BPF_K: |
||
990 | case BPF_ALU|BPF_XOR|BPF_K: |
||
991 | case BPF_ALU|BPF_LSH|BPF_K: |
||
992 | case BPF_ALU|BPF_RSH|BPF_K: |
||
993 | op = BPF_OP(s->code); |
||
994 | if (alter) { |
||
995 | if (s->k == 0) { |
||
996 | /* don't optimize away "sub #0" |
||
997 | * as it may be needed later to |
||
998 | * fixup the generated math code */ |
||
999 | if (op == BPF_ADD || |
||
1000 | op == BPF_LSH || op == BPF_RSH || |
||
1001 | op == BPF_OR || op == BPF_XOR) { |
||
1002 | s->code = NOP; |
||
1003 | break; |
||
1004 | } |
||
1005 | if (op == BPF_MUL || op == BPF_AND) { |
||
1006 | s->code = BPF_LD|BPF_IMM; |
||
1007 | val[A_ATOM] = K(s->k); |
||
1008 | break; |
||
1009 | } |
||
1010 | } |
||
1011 | if (vmap[val[A_ATOM]].is_const) { |
||
1012 | fold_op(s, val[A_ATOM], K(s->k)); |
||
1013 | val[A_ATOM] = K(s->k); |
||
1014 | break; |
||
1015 | } |
||
1016 | } |
||
1017 | val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); |
||
1018 | break; |
||
1019 | |||
1020 | case BPF_ALU|BPF_ADD|BPF_X: |
||
1021 | case BPF_ALU|BPF_SUB|BPF_X: |
||
1022 | case BPF_ALU|BPF_MUL|BPF_X: |
||
1023 | case BPF_ALU|BPF_DIV|BPF_X: |
||
1024 | case BPF_ALU|BPF_MOD|BPF_X: |
||
1025 | case BPF_ALU|BPF_AND|BPF_X: |
||
1026 | case BPF_ALU|BPF_OR|BPF_X: |
||
1027 | case BPF_ALU|BPF_XOR|BPF_X: |
||
1028 | case BPF_ALU|BPF_LSH|BPF_X: |
||
1029 | case BPF_ALU|BPF_RSH|BPF_X: |
||
1030 | op = BPF_OP(s->code); |
||
1031 | if (alter && vmap[val[X_ATOM]].is_const) { |
||
1032 | if (vmap[val[A_ATOM]].is_const) { |
||
1033 | fold_op(s, val[A_ATOM], val[X_ATOM]); |
||
1034 | val[A_ATOM] = K(s->k); |
||
1035 | } |
||
1036 | else { |
||
1037 | s->code = BPF_ALU|BPF_K|op; |
||
1038 | s->k = vmap[val[X_ATOM]].const_val; |
||
1039 | done = 0; |
||
1040 | val[A_ATOM] = |
||
1041 | F(s->code, val[A_ATOM], K(s->k)); |
||
1042 | } |
||
1043 | break; |
||
1044 | } |
||
1045 | /* |
||
1046 | * Check if we're doing something to an accumulator |
||
1047 | * that is 0, and simplify. This may not seem like |
||
1048 | * much of a simplification but it could open up further |
||
1049 | * optimizations. |
||
1050 | * XXX We could also check for mul by 1, etc. |
||
1051 | */ |
||
1052 | if (alter && vmap[val[A_ATOM]].is_const |
||
1053 | && vmap[val[A_ATOM]].const_val == 0) { |
||
1054 | if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) { |
||
1055 | s->code = BPF_MISC|BPF_TXA; |
||
1056 | vstore(s, &val[A_ATOM], val[X_ATOM], alter); |
||
1057 | break; |
||
1058 | } |
||
1059 | else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD || |
||
1060 | op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { |
||
1061 | s->code = BPF_LD|BPF_IMM; |
||
1062 | s->k = 0; |
||
1063 | vstore(s, &val[A_ATOM], K(s->k), alter); |
||
1064 | break; |
||
1065 | } |
||
1066 | else if (op == BPF_NEG) { |
||
1067 | s->code = NOP; |
||
1068 | break; |
||
1069 | } |
||
1070 | } |
||
1071 | val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); |
||
1072 | break; |
||
1073 | |||
1074 | case BPF_MISC|BPF_TXA: |
||
1075 | vstore(s, &val[A_ATOM], val[X_ATOM], alter); |
||
1076 | break; |
||
1077 | |||
1078 | case BPF_LD|BPF_MEM: |
||
1079 | v = val[s->k]; |
||
1080 | if (alter && vmap[v].is_const) { |
||
1081 | s->code = BPF_LD|BPF_IMM; |
||
1082 | s->k = vmap[v].const_val; |
||
1083 | done = 0; |
||
1084 | } |
||
1085 | vstore(s, &val[A_ATOM], v, alter); |
||
1086 | break; |
||
1087 | |||
1088 | case BPF_MISC|BPF_TAX: |
||
1089 | vstore(s, &val[X_ATOM], val[A_ATOM], alter); |
||
1090 | break; |
||
1091 | |||
1092 | case BPF_LDX|BPF_MEM: |
||
1093 | v = val[s->k]; |
||
1094 | if (alter && vmap[v].is_const) { |
||
1095 | s->code = BPF_LDX|BPF_IMM; |
||
1096 | s->k = vmap[v].const_val; |
||
1097 | done = 0; |
||
1098 | } |
||
1099 | vstore(s, &val[X_ATOM], v, alter); |
||
1100 | break; |
||
1101 | |||
1102 | case BPF_ST: |
||
1103 | vstore(s, &val[s->k], val[A_ATOM], alter); |
||
1104 | break; |
||
1105 | |||
1106 | case BPF_STX: |
||
1107 | vstore(s, &val[s->k], val[X_ATOM], alter); |
||
1108 | break; |
||
1109 | } |
||
1110 | } |
||
1111 | |||
1112 | static void |
||
1113 | deadstmt(register struct stmt *s, register struct stmt *last[]) |
||
1114 | { |
||
1115 | register int atom; |
||
1116 | |||
1117 | atom = atomuse(s); |
||
1118 | if (atom >= 0) { |
||
1119 | if (atom == AX_ATOM) { |
||
1120 | last[X_ATOM] = 0; |
||
1121 | last[A_ATOM] = 0; |
||
1122 | } |
||
1123 | else |
||
1124 | last[atom] = 0; |
||
1125 | } |
||
1126 | atom = atomdef(s); |
||
1127 | if (atom >= 0) { |
||
1128 | if (last[atom]) { |
||
1129 | done = 0; |
||
1130 | last[atom]->code = NOP; |
||
1131 | } |
||
1132 | last[atom] = s; |
||
1133 | } |
||
1134 | } |
||
1135 | |||
1136 | static void |
||
1137 | opt_deadstores(register struct block *b) |
||
1138 | { |
||
1139 | register struct slist *s; |
||
1140 | register int atom; |
||
1141 | struct stmt *last[N_ATOMS]; |
||
1142 | |||
1143 | memset((char *)last, 0, sizeof last); |
||
1144 | |||
1145 | for (s = b->stmts; s != 0; s = s->next) |
||
1146 | deadstmt(&s->s, last); |
||
1147 | deadstmt(&b->s, last); |
||
1148 | |||
1149 | for (atom = 0; atom < N_ATOMS; ++atom) |
||
1150 | if (last[atom] && !ATOMELEM(b->out_use, atom)) { |
||
1151 | last[atom]->code = NOP; |
||
1152 | done = 0; |
||
1153 | } |
||
1154 | } |
||
1155 | |||
1156 | static void |
||
1157 | opt_blk(struct block *b, int do_stmts) |
||
1158 | { |
||
1159 | struct slist *s; |
||
1160 | struct edge *p; |
||
1161 | int i; |
||
1162 | bpf_int32 aval, xval; |
||
1163 | |||
1164 | #if 0 |
||
1165 | for (s = b->stmts; s && s->next; s = s->next) |
||
1166 | if (BPF_CLASS(s->s.code) == BPF_JMP) { |
||
1167 | do_stmts = 0; |
||
1168 | break; |
||
1169 | } |
||
1170 | #endif |
||
1171 | |||
1172 | /* |
||
1173 | * Initialize the atom values. |
||
1174 | */ |
||
1175 | p = b->in_edges; |
||
1176 | if (p == 0) { |
||
1177 | /* |
||
1178 | * We have no predecessors, so everything is undefined |
||
1179 | * upon entry to this block. |
||
1180 | */ |
||
1181 | memset((char *)b->val, 0, sizeof(b->val)); |
||
1182 | } else { |
||
1183 | /* |
||
1184 | * Inherit values from our predecessors. |
||
1185 | * |
||
1186 | * First, get the values from the predecessor along the |
||
1187 | * first edge leading to this node. |
||
1188 | */ |
||
1189 | memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); |
||
1190 | /* |
||
1191 | * Now look at all the other nodes leading to this node. |
||
1192 | * If, for the predecessor along that edge, a register |
||
1193 | * has a different value from the one we have (i.e., |
||
1194 | * control paths are merging, and the merging paths |
||
1195 | * assign different values to that register), give the |
||
1196 | * register the undefined value of 0. |
||
1197 | */ |
||
1198 | while ((p = p->next) != NULL) { |
||
1199 | for (i = 0; i < N_ATOMS; ++i) |
||
1200 | if (b->val[i] != p->pred->val[i]) |
||
1201 | b->val[i] = 0; |
||
1202 | } |
||
1203 | } |
||
1204 | aval = b->val[A_ATOM]; |
||
1205 | xval = b->val[X_ATOM]; |
||
1206 | for (s = b->stmts; s; s = s->next) |
||
1207 | opt_stmt(&s->s, b->val, do_stmts); |
||
1208 | |||
1209 | /* |
||
1210 | * This is a special case: if we don't use anything from this |
||
1211 | * block, and we load the accumulator or index register with a |
||
1212 | * value that is already there, or if this block is a return, |
||
1213 | * eliminate all the statements. |
||
1214 | * |
||
1215 | * XXX - what if it does a store? |
||
1216 | * |
||
1217 | * XXX - why does it matter whether we use anything from this |
||
1218 | * block? If the accumulator or index register doesn't change |
||
1219 | * its value, isn't that OK even if we use that value? |
||
1220 | * |
||
1221 | * XXX - if we load the accumulator with a different value, |
||
1222 | * and the block ends with a conditional branch, we obviously |
||
1223 | * can't eliminate it, as the branch depends on that value. |
||
1224 | * For the index register, the conditional branch only depends |
||
1225 | * on the index register value if the test is against the index |
||
1226 | * register value rather than a constant; if nothing uses the |
||
1227 | * value we put into the index register, and we're not testing |
||
1228 | * against the index register's value, and there aren't any |
||
1229 | * other problems that would keep us from eliminating this |
||
1230 | * block, can we eliminate it? |
||
1231 | */ |
||
1232 | if (do_stmts && |
||
1233 | ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && |
||
1234 | xval != 0 && b->val[X_ATOM] == xval) || |
||
1235 | BPF_CLASS(b->s.code) == BPF_RET)) { |
||
1236 | if (b->stmts != 0) { |
||
1237 | b->stmts = 0; |
||
1238 | done = 0; |
||
1239 | } |
||
1240 | } else { |
||
1241 | opt_peep(b); |
||
1242 | opt_deadstores(b); |
||
1243 | } |
||
1244 | /* |
||
1245 | * Set up values for branch optimizer. |
||
1246 | */ |
||
1247 | if (BPF_SRC(b->s.code) == BPF_K) |
||
1248 | b->oval = K(b->s.k); |
||
1249 | else |
||
1250 | b->oval = b->val[X_ATOM]; |
||
1251 | b->et.code = b->s.code; |
||
1252 | b->ef.code = -b->s.code; |
||
1253 | } |
||
1254 | |||
1255 | /* |
||
1256 | * Return true if any register that is used on exit from 'succ', has |
||
1257 | * an exit value that is different from the corresponding exit value |
||
1258 | * from 'b'. |
||
1259 | */ |
||
1260 | static int |
||
1261 | use_conflict(struct block *b, struct block *succ) |
||
1262 | { |
||
1263 | int atom; |
||
1264 | atomset use = succ->out_use; |
||
1265 | |||
1266 | if (use == 0) |
||
1267 | return 0; |
||
1268 | |||
1269 | for (atom = 0; atom < N_ATOMS; ++atom) |
||
1270 | if (ATOMELEM(use, atom)) |
||
1271 | if (b->val[atom] != succ->val[atom]) |
||
1272 | return 1; |
||
1273 | return 0; |
||
1274 | } |
||
1275 | |||
1276 | static struct block * |
||
1277 | fold_edge(struct block *child, struct edge *ep) |
||
1278 | { |
||
1279 | int sense; |
||
1280 | int aval0, aval1, oval0, oval1; |
||
1281 | int code = ep->code; |
||
1282 | |||
1283 | if (code < 0) { |
||
1284 | code = -code; |
||
1285 | sense = 0; |
||
1286 | } else |
||
1287 | sense = 1; |
||
1288 | |||
1289 | if (child->s.code != code) |
||
1290 | return 0; |
||
1291 | |||
1292 | aval0 = child->val[A_ATOM]; |
||
1293 | oval0 = child->oval; |
||
1294 | aval1 = ep->pred->val[A_ATOM]; |
||
1295 | oval1 = ep->pred->oval; |
||
1296 | |||
1297 | if (aval0 != aval1) |
||
1298 | return 0; |
||
1299 | |||
1300 | if (oval0 == oval1) |
||
1301 | /* |
||
1302 | * The operands of the branch instructions are |
||
1303 | * identical, so the result is true if a true |
||
1304 | * branch was taken to get here, otherwise false. |
||
1305 | */ |
||
1306 | return sense ? JT(child) : JF(child); |
||
1307 | |||
1308 | if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) |
||
1309 | /* |
||
1310 | * At this point, we only know the comparison if we |
||
1311 | * came down the true branch, and it was an equality |
||
1312 | * comparison with a constant. |
||
1313 | * |
||
1314 | * I.e., if we came down the true branch, and the branch |
||
1315 | * was an equality comparison with a constant, we know the |
||
1316 | * accumulator contains that constant. If we came down |
||
1317 | * the false branch, or the comparison wasn't with a |
||
1318 | * constant, we don't know what was in the accumulator. |
||
1319 | * |
||
1320 | * We rely on the fact that distinct constants have distinct |
||
1321 | * value numbers. |
||
1322 | */ |
||
1323 | return JF(child); |
||
1324 | |||
1325 | return 0; |
||
1326 | } |
||
1327 | |||
1328 | static void |
||
1329 | opt_j(struct edge *ep) |
||
1330 | { |
||
1331 | register int i, k; |
||
1332 | register struct block *target; |
||
1333 | |||
1334 | if (JT(ep->succ) == 0) |
||
1335 | return; |
||
1336 | |||
1337 | if (JT(ep->succ) == JF(ep->succ)) { |
||
1338 | /* |
||
1339 | * Common branch targets can be eliminated, provided |
||
1340 | * there is no data dependency. |
||
1341 | */ |
||
1342 | if (!use_conflict(ep->pred, ep->succ->et.succ)) { |
||
1343 | done = 0; |
||
1344 | ep->succ = JT(ep->succ); |
||
1345 | } |
||
1346 | } |
||
1347 | /* |
||
1348 | * For each edge dominator that matches the successor of this |
||
1349 | * edge, promote the edge successor to the its grandchild. |
||
1350 | * |
||
1351 | * XXX We violate the set abstraction here in favor a reasonably |
||
1352 | * efficient loop. |
||
1353 | */ |
||
1354 | top: |
||
1355 | for (i = 0; i < edgewords; ++i) { |
||
1356 | register bpf_u_int32 x = ep->edom[i]; |
||
1357 | |||
1358 | while (x != 0) { |
||
1359 | k = ffs(x) - 1; |
||
1360 | x &=~ (1 << k); |
||
1361 | k += i * BITS_PER_WORD; |
||
1362 | |||
1363 | target = fold_edge(ep->succ, edges[k]); |
||
1364 | /* |
||
1365 | * Check that there is no data dependency between |
||
1366 | * nodes that will be violated if we move the edge. |
||
1367 | */ |
||
1368 | if (target != 0 && !use_conflict(ep->pred, target)) { |
||
1369 | done = 0; |
||
1370 | ep->succ = target; |
||
1371 | if (JT(target) != 0) |
||
1372 | /* |
||
1373 | * Start over unless we hit a leaf. |
||
1374 | */ |
||
1375 | goto top; |
||
1376 | return; |
||
1377 | } |
||
1378 | } |
||
1379 | } |
||
1380 | } |
||
1381 | |||
1382 | |||
1383 | static void |
||
1384 | or_pullup(struct block *b) |
||
1385 | { |
||
1386 | int val, at_top; |
||
1387 | struct block *pull; |
||
1388 | struct block **diffp, **samep; |
||
1389 | struct edge *ep; |
||
1390 | |||
1391 | ep = b->in_edges; |
||
1392 | if (ep == 0) |
||
1393 | return; |
||
1394 | |||
1395 | /* |
||
1396 | * Make sure each predecessor loads the same value. |
||
1397 | * XXX why? |
||
1398 | */ |
||
1399 | val = ep->pred->val[A_ATOM]; |
||
1400 | for (ep = ep->next; ep != 0; ep = ep->next) |
||
1401 | if (val != ep->pred->val[A_ATOM]) |
||
1402 | return; |
||
1403 | |||
1404 | if (JT(b->in_edges->pred) == b) |
||
1405 | diffp = &JT(b->in_edges->pred); |
||
1406 | else |
||
1407 | diffp = &JF(b->in_edges->pred); |
||
1408 | |||
1409 | at_top = 1; |
||
1410 | while (1) { |
||
1411 | if (*diffp == 0) |
||
1412 | return; |
||
1413 | |||
1414 | if (JT(*diffp) != JT(b)) |
||
1415 | return; |
||
1416 | |||
1417 | if (!SET_MEMBER((*diffp)->dom, b->id)) |
||
1418 | return; |
||
1419 | |||
1420 | if ((*diffp)->val[A_ATOM] != val) |
||
1421 | break; |
||
1422 | |||
1423 | diffp = &JF(*diffp); |
||
1424 | at_top = 0; |
||
1425 | } |
||
1426 | samep = &JF(*diffp); |
||
1427 | while (1) { |
||
1428 | if (*samep == 0) |
||
1429 | return; |
||
1430 | |||
1431 | if (JT(*samep) != JT(b)) |
||
1432 | return; |
||
1433 | |||
1434 | if (!SET_MEMBER((*samep)->dom, b->id)) |
||
1435 | return; |
||
1436 | |||
1437 | if ((*samep)->val[A_ATOM] == val) |
||
1438 | break; |
||
1439 | |||
1440 | /* XXX Need to check that there are no data dependencies |
||
1441 | between dp0 and dp1. Currently, the code generator |
||
1442 | will not produce such dependencies. */ |
||
1443 | samep = &JF(*samep); |
||
1444 | } |
||
1445 | #ifdef notdef |
||
1446 | /* XXX This doesn't cover everything. */ |
||
1447 | for (i = 0; i < N_ATOMS; ++i) |
||
1448 | if ((*samep)->val[i] != pred->val[i]) |
||
1449 | return; |
||
1450 | #endif |
||
1451 | /* Pull up the node. */ |
||
1452 | pull = *samep; |
||
1453 | *samep = JF(pull); |
||
1454 | JF(pull) = *diffp; |
||
1455 | |||
1456 | /* |
||
1457 | * At the top of the chain, each predecessor needs to point at the |
||
1458 | * pulled up node. Inside the chain, there is only one predecessor |
||
1459 | * to worry about. |
||
1460 | */ |
||
1461 | if (at_top) { |
||
1462 | for (ep = b->in_edges; ep != 0; ep = ep->next) { |
||
1463 | if (JT(ep->pred) == b) |
||
1464 | JT(ep->pred) = pull; |
||
1465 | else |
||
1466 | JF(ep->pred) = pull; |
||
1467 | } |
||
1468 | } |
||
1469 | else |
||
1470 | *diffp = pull; |
||
1471 | |||
1472 | done = 0; |
||
1473 | } |
||
1474 | |||
1475 | static void |
||
1476 | and_pullup(struct block *b) |
||
1477 | { |
||
1478 | int val, at_top; |
||
1479 | struct block *pull; |
||
1480 | struct block **diffp, **samep; |
||
1481 | struct edge *ep; |
||
1482 | |||
1483 | ep = b->in_edges; |
||
1484 | if (ep == 0) |
||
1485 | return; |
||
1486 | |||
1487 | /* |
||
1488 | * Make sure each predecessor loads the same value. |
||
1489 | */ |
||
1490 | val = ep->pred->val[A_ATOM]; |
||
1491 | for (ep = ep->next; ep != 0; ep = ep->next) |
||
1492 | if (val != ep->pred->val[A_ATOM]) |
||
1493 | return; |
||
1494 | |||
1495 | if (JT(b->in_edges->pred) == b) |
||
1496 | diffp = &JT(b->in_edges->pred); |
||
1497 | else |
||
1498 | diffp = &JF(b->in_edges->pred); |
||
1499 | |||
1500 | at_top = 1; |
||
1501 | while (1) { |
||
1502 | if (*diffp == 0) |
||
1503 | return; |
||
1504 | |||
1505 | if (JF(*diffp) != JF(b)) |
||
1506 | return; |
||
1507 | |||
1508 | if (!SET_MEMBER((*diffp)->dom, b->id)) |
||
1509 | return; |
||
1510 | |||
1511 | if ((*diffp)->val[A_ATOM] != val) |
||
1512 | break; |
||
1513 | |||
1514 | diffp = &JT(*diffp); |
||
1515 | at_top = 0; |
||
1516 | } |
||
1517 | samep = &JT(*diffp); |
||
1518 | while (1) { |
||
1519 | if (*samep == 0) |
||
1520 | return; |
||
1521 | |||
1522 | if (JF(*samep) != JF(b)) |
||
1523 | return; |
||
1524 | |||
1525 | if (!SET_MEMBER((*samep)->dom, b->id)) |
||
1526 | return; |
||
1527 | |||
1528 | if ((*samep)->val[A_ATOM] == val) |
||
1529 | break; |
||
1530 | |||
1531 | /* XXX Need to check that there are no data dependencies |
||
1532 | between diffp and samep. Currently, the code generator |
||
1533 | will not produce such dependencies. */ |
||
1534 | samep = &JT(*samep); |
||
1535 | } |
||
1536 | #ifdef notdef |
||
1537 | /* XXX This doesn't cover everything. */ |
||
1538 | for (i = 0; i < N_ATOMS; ++i) |
||
1539 | if ((*samep)->val[i] != pred->val[i]) |
||
1540 | return; |
||
1541 | #endif |
||
1542 | /* Pull up the node. */ |
||
1543 | pull = *samep; |
||
1544 | *samep = JT(pull); |
||
1545 | JT(pull) = *diffp; |
||
1546 | |||
1547 | /* |
||
1548 | * At the top of the chain, each predecessor needs to point at the |
||
1549 | * pulled up node. Inside the chain, there is only one predecessor |
||
1550 | * to worry about. |
||
1551 | */ |
||
1552 | if (at_top) { |
||
1553 | for (ep = b->in_edges; ep != 0; ep = ep->next) { |
||
1554 | if (JT(ep->pred) == b) |
||
1555 | JT(ep->pred) = pull; |
||
1556 | else |
||
1557 | JF(ep->pred) = pull; |
||
1558 | } |
||
1559 | } |
||
1560 | else |
||
1561 | *diffp = pull; |
||
1562 | |||
1563 | done = 0; |
||
1564 | } |
||
1565 | |||
1566 | static void |
||
1567 | opt_blks(struct block *root, int do_stmts) |
||
1568 | { |
||
1569 | int i, maxlevel; |
||
1570 | struct block *p; |
||
1571 | |||
1572 | init_val(); |
||
1573 | maxlevel = root->level; |
||
1574 | |||
1575 | find_inedges(root); |
||
1576 | for (i = maxlevel; i >= 0; --i) |
||
1577 | for (p = levels[i]; p; p = p->link) |
||
1578 | opt_blk(p, do_stmts); |
||
1579 | |||
1580 | if (do_stmts) |
||
1581 | /* |
||
1582 | * No point trying to move branches; it can't possibly |
||
1583 | * make a difference at this point. |
||
1584 | */ |
||
1585 | return; |
||
1586 | |||
1587 | for (i = 1; i <= maxlevel; ++i) { |
||
1588 | for (p = levels[i]; p; p = p->link) { |
||
1589 | opt_j(&p->et); |
||
1590 | opt_j(&p->ef); |
||
1591 | } |
||
1592 | } |
||
1593 | |||
1594 | find_inedges(root); |
||
1595 | for (i = 1; i <= maxlevel; ++i) { |
||
1596 | for (p = levels[i]; p; p = p->link) { |
||
1597 | or_pullup(p); |
||
1598 | and_pullup(p); |
||
1599 | } |
||
1600 | } |
||
1601 | } |
||
1602 | |||
1603 | static inline void |
||
1604 | link_inedge(struct edge *parent, struct block *child) |
||
1605 | { |
||
1606 | parent->next = child->in_edges; |
||
1607 | child->in_edges = parent; |
||
1608 | } |
||
1609 | |||
1610 | static void |
||
1611 | find_inedges(struct block *root) |
||
1612 | { |
||
1613 | int i; |
||
1614 | struct block *b; |
||
1615 | |||
1616 | for (i = 0; i < n_blocks; ++i) |
||
1617 | blocks[i]->in_edges = 0; |
||
1618 | |||
1619 | /* |
||
1620 | * Traverse the graph, adding each edge to the predecessor |
||
1621 | * list of its successors. Skip the leaves (i.e. level 0). |
||
1622 | */ |
||
1623 | for (i = root->level; i > 0; --i) { |
||
1624 | for (b = levels[i]; b != 0; b = b->link) { |
||
1625 | link_inedge(&b->et, JT(b)); |
||
1626 | link_inedge(&b->ef, JF(b)); |
||
1627 | } |
||
1628 | } |
||
1629 | } |
||
1630 | |||
1631 | static void |
||
1632 | opt_root(struct block **b) |
||
1633 | { |
||
1634 | struct slist *tmp, *s; |
||
1635 | |||
1636 | s = (*b)->stmts; |
||
1637 | (*b)->stmts = 0; |
||
1638 | while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) |
||
1639 | *b = JT(*b); |
||
1640 | |||
1641 | tmp = (*b)->stmts; |
||
1642 | if (tmp != 0) |
||
1643 | sappend(s, tmp); |
||
1644 | (*b)->stmts = s; |
||
1645 | |||
1646 | /* |
||
1647 | * If the root node is a return, then there is no |
||
1648 | * point executing any statements (since the bpf machine |
||
1649 | * has no side effects). |
||
1650 | */ |
||
1651 | if (BPF_CLASS((*b)->s.code) == BPF_RET) |
||
1652 | (*b)->stmts = 0; |
||
1653 | } |
||
1654 | |||
1655 | static void |
||
1656 | opt_loop(struct block *root, int do_stmts) |
||
1657 | { |
||
1658 | |||
1659 | #ifdef BDEBUG |
||
1660 | if (dflag > 1) { |
||
1661 | printf("opt_loop(root, %d) begin\n", do_stmts); |
||
1662 | opt_dump(root); |
||
1663 | } |
||
1664 | #endif |
||
1665 | do { |
||
1666 | done = 1; |
||
1667 | find_levels(root); |
||
1668 | find_dom(root); |
||
1669 | find_closure(root); |
||
1670 | find_ud(root); |
||
1671 | find_edom(root); |
||
1672 | opt_blks(root, do_stmts); |
||
1673 | #ifdef BDEBUG |
||
1674 | if (dflag > 1) { |
||
1675 | printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); |
||
1676 | opt_dump(root); |
||
1677 | } |
||
1678 | #endif |
||
1679 | } while (!done); |
||
1680 | } |
||
1681 | |||
1682 | /* |
||
1683 | * Optimize the filter code in its dag representation. |
||
1684 | */ |
||
1685 | void |
||
1686 | bpf_optimize(struct block **rootp) |
||
1687 | { |
||
1688 | struct block *root; |
||
1689 | |||
1690 | root = *rootp; |
||
1691 | |||
1692 | opt_init(root); |
||
1693 | opt_loop(root, 0); |
||
1694 | opt_loop(root, 1); |
||
1695 | intern_blocks(root); |
||
1696 | #ifdef BDEBUG |
||
1697 | if (dflag > 1) { |
||
1698 | printf("after intern_blocks()\n"); |
||
1699 | opt_dump(root); |
||
1700 | } |
||
1701 | #endif |
||
1702 | opt_root(rootp); |
||
1703 | #ifdef BDEBUG |
||
1704 | if (dflag > 1) { |
||
1705 | printf("after opt_root()\n"); |
||
1706 | opt_dump(root); |
||
1707 | } |
||
1708 | #endif |
||
1709 | opt_cleanup(); |
||
1710 | } |
||
1711 | |||
1712 | static void |
||
1713 | make_marks(struct block *p) |
||
1714 | { |
||
1715 | if (!isMarked(p)) { |
||
1716 | Mark(p); |
||
1717 | if (BPF_CLASS(p->s.code) != BPF_RET) { |
||
1718 | make_marks(JT(p)); |
||
1719 | make_marks(JF(p)); |
||
1720 | } |
||
1721 | } |
||
1722 | } |
||
1723 | |||
1724 | /* |
||
1725 | * Mark code array such that isMarked(i) is true |
||
1726 | * only for nodes that are alive. |
||
1727 | */ |
||
1728 | static void |
||
1729 | mark_code(struct block *p) |
||
1730 | { |
||
1731 | cur_mark += 1; |
||
1732 | make_marks(p); |
||
1733 | } |
||
1734 | |||
1735 | /* |
||
1736 | * True iff the two stmt lists load the same value from the packet into |
||
1737 | * the accumulator. |
||
1738 | */ |
||
1739 | static int |
||
1740 | eq_slist(struct slist *x, struct slist *y) |
||
1741 | { |
||
1742 | while (1) { |
||
1743 | while (x && x->s.code == NOP) |
||
1744 | x = x->next; |
||
1745 | while (y && y->s.code == NOP) |
||
1746 | y = y->next; |
||
1747 | if (x == 0) |
||
1748 | return y == 0; |
||
1749 | if (y == 0) |
||
1750 | return x == 0; |
||
1751 | if (x->s.code != y->s.code || x->s.k != y->s.k) |
||
1752 | return 0; |
||
1753 | x = x->next; |
||
1754 | y = y->next; |
||
1755 | } |
||
1756 | } |
||
1757 | |||
1758 | static inline int |
||
1759 | eq_blk(struct block *b0, struct block *b1) |
||
1760 | { |
||
1761 | if (b0->s.code == b1->s.code && |
||
1762 | b0->s.k == b1->s.k && |
||
1763 | b0->et.succ == b1->et.succ && |
||
1764 | b0->ef.succ == b1->ef.succ) |
||
1765 | return eq_slist(b0->stmts, b1->stmts); |
||
1766 | return 0; |
||
1767 | } |
||
1768 | |||
1769 | static void |
||
1770 | intern_blocks(struct block *root) |
||
1771 | { |
||
1772 | struct block *p; |
||
1773 | int i, j; |
||
1774 | int done1; /* don't shadow global */ |
||
1775 | top: |
||
1776 | done1 = 1; |
||
1777 | for (i = 0; i < n_blocks; ++i) |
||
1778 | blocks[i]->link = 0; |
||
1779 | |||
1780 | mark_code(root); |
||
1781 | |||
1782 | for (i = n_blocks - 1; --i >= 0; ) { |
||
1783 | if (!isMarked(blocks[i])) |
||
1784 | continue; |
||
1785 | for (j = i + 1; j < n_blocks; ++j) { |
||
1786 | if (!isMarked(blocks[j])) |
||
1787 | continue; |
||
1788 | if (eq_blk(blocks[i], blocks[j])) { |
||
1789 | blocks[i]->link = blocks[j]->link ? |
||
1790 | blocks[j]->link : blocks[j]; |
||
1791 | break; |
||
1792 | } |
||
1793 | } |
||
1794 | } |
||
1795 | for (i = 0; i < n_blocks; ++i) { |
||
1796 | p = blocks[i]; |
||
1797 | if (JT(p) == 0) |
||
1798 | continue; |
||
1799 | if (JT(p)->link) { |
||
1800 | done1 = 0; |
||
1801 | JT(p) = JT(p)->link; |
||
1802 | } |
||
1803 | if (JF(p)->link) { |
||
1804 | done1 = 0; |
||
1805 | JF(p) = JF(p)->link; |
||
1806 | } |
||
1807 | } |
||
1808 | if (!done1) |
||
1809 | goto top; |
||
1810 | } |
||
1811 | |||
1812 | static void |
||
1813 | opt_cleanup(void) |
||
1814 | { |
||
1815 | free((void *)vnode_base); |
||
1816 | free((void *)vmap); |
||
1817 | free((void *)edges); |
||
1818 | free((void *)space); |
||
1819 | free((void *)levels); |
||
1820 | free((void *)blocks); |
||
1821 | } |
||
1822 | |||
1823 | /* |
||
1824 | * Return the number of stmts in 's'. |
||
1825 | */ |
||
1826 | static u_int |
||
1827 | slength(struct slist *s) |
||
1828 | { |
||
1829 | u_int n = 0; |
||
1830 | |||
1831 | for (; s; s = s->next) |
||
1832 | if (s->s.code != NOP) |
||
1833 | ++n; |
||
1834 | return n; |
||
1835 | } |
||
1836 | |||
1837 | /* |
||
1838 | * Return the number of nodes reachable by 'p'. |
||
1839 | * All nodes should be initially unmarked. |
||
1840 | */ |
||
1841 | static int |
||
1842 | count_blocks(struct block *p) |
||
1843 | { |
||
1844 | if (p == 0 || isMarked(p)) |
||
1845 | return 0; |
||
1846 | Mark(p); |
||
1847 | return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; |
||
1848 | } |
||
1849 | |||
1850 | /* |
||
1851 | * Do a depth first search on the flow graph, numbering the |
||
1852 | * the basic blocks, and entering them into the 'blocks' array.` |
||
1853 | */ |
||
1854 | static void |
||
1855 | number_blks_r(struct block *p) |
||
1856 | { |
||
1857 | int n; |
||
1858 | |||
1859 | if (p == 0 || isMarked(p)) |
||
1860 | return; |
||
1861 | |||
1862 | Mark(p); |
||
1863 | n = n_blocks++; |
||
1864 | p->id = n; |
||
1865 | blocks[n] = p; |
||
1866 | |||
1867 | number_blks_r(JT(p)); |
||
1868 | number_blks_r(JF(p)); |
||
1869 | } |
||
1870 | |||
1871 | /* |
||
1872 | * Return the number of stmts in the flowgraph reachable by 'p'. |
||
1873 | * The nodes should be unmarked before calling. |
||
1874 | * |
||
1875 | * Note that "stmts" means "instructions", and that this includes |
||
1876 | * |
||
1877 | * side-effect statements in 'p' (slength(p->stmts)); |
||
1878 | * |
||
1879 | * statements in the true branch from 'p' (count_stmts(JT(p))); |
||
1880 | * |
||
1881 | * statements in the false branch from 'p' (count_stmts(JF(p))); |
||
1882 | * |
||
1883 | * the conditional jump itself (1); |
||
1884 | * |
||
1885 | * an extra long jump if the true branch requires it (p->longjt); |
||
1886 | * |
||
1887 | * an extra long jump if the false branch requires it (p->longjf). |
||
1888 | */ |
||
1889 | static u_int |
||
1890 | count_stmts(struct block *p) |
||
1891 | { |
||
1892 | u_int n; |
||
1893 | |||
1894 | if (p == 0 || isMarked(p)) |
||
1895 | return 0; |
||
1896 | Mark(p); |
||
1897 | n = count_stmts(JT(p)) + count_stmts(JF(p)); |
||
1898 | return slength(p->stmts) + n + 1 + p->longjt + p->longjf; |
||
1899 | } |
||
1900 | |||
1901 | /* |
||
1902 | * Allocate memory. All allocation is done before optimization |
||
1903 | * is begun. A linear bound on the size of all data structures is computed |
||
1904 | * from the total number of blocks and/or statements. |
||
1905 | */ |
||
1906 | static void |
||
1907 | opt_init(struct block *root) |
||
1908 | { |
||
1909 | bpf_u_int32 *p; |
||
1910 | int i, n, max_stmts; |
||
1911 | |||
1912 | /* |
||
1913 | * First, count the blocks, so we can malloc an array to map |
||
1914 | * block number to block. Then, put the blocks into the array. |
||
1915 | */ |
||
1916 | unMarkAll(); |
||
1917 | n = count_blocks(root); |
||
1918 | blocks = (struct block **)calloc(n, sizeof(*blocks)); |
||
1919 | if (blocks == NULL) |
||
1920 | bpf_error("malloc"); |
||
1921 | unMarkAll(); |
||
1922 | n_blocks = 0; |
||
1923 | number_blks_r(root); |
||
1924 | |||
1925 | n_edges = 2 * n_blocks; |
||
1926 | edges = (struct edge **)calloc(n_edges, sizeof(*edges)); |
||
1927 | if (edges == NULL) |
||
1928 | bpf_error("malloc"); |
||
1929 | |||
1930 | /* |
||
1931 | * The number of levels is bounded by the number of nodes. |
||
1932 | */ |
||
1933 | levels = (struct block **)calloc(n_blocks, sizeof(*levels)); |
||
1934 | if (levels == NULL) |
||
1935 | bpf_error("malloc"); |
||
1936 | |||
1937 | edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; |
||
1938 | nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; |
||
1939 | |||
1940 | /* XXX */ |
||
1941 | space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) |
||
1942 | + n_edges * edgewords * sizeof(*space)); |
||
1943 | if (space == NULL) |
||
1944 | bpf_error("malloc"); |
||
1945 | p = space; |
||
1946 | all_dom_sets = p; |
||
1947 | for (i = 0; i < n; ++i) { |
||
1948 | blocks[i]->dom = p; |
||
1949 | p += nodewords; |
||
1950 | } |
||
1951 | all_closure_sets = p; |
||
1952 | for (i = 0; i < n; ++i) { |
||
1953 | blocks[i]->closure = p; |
||
1954 | p += nodewords; |
||
1955 | } |
||
1956 | all_edge_sets = p; |
||
1957 | for (i = 0; i < n; ++i) { |
||
1958 | register struct block *b = blocks[i]; |
||
1959 | |||
1960 | b->et.edom = p; |
||
1961 | p += edgewords; |
||
1962 | b->ef.edom = p; |
||
1963 | p += edgewords; |
||
1964 | b->et.id = i; |
||
1965 | edges[i] = &b->et; |
||
1966 | b->ef.id = n_blocks + i; |
||
1967 | edges[n_blocks + i] = &b->ef; |
||
1968 | b->et.pred = b; |
||
1969 | b->ef.pred = b; |
||
1970 | } |
||
1971 | max_stmts = 0; |
||
1972 | for (i = 0; i < n; ++i) |
||
1973 | max_stmts += slength(blocks[i]->stmts) + 1; |
||
1974 | /* |
||
1975 | * We allocate at most 3 value numbers per statement, |
||
1976 | * so this is an upper bound on the number of valnodes |
||
1977 | * we'll need. |
||
1978 | */ |
||
1979 | maxval = 3 * max_stmts; |
||
1980 | vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); |
||
1981 | vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); |
||
1982 | if (vmap == NULL || vnode_base == NULL) |
||
1983 | bpf_error("malloc"); |
||
1984 | } |
||
1985 | |||
1986 | /* |
||
1987 | * Some pointers used to convert the basic block form of the code, |
||
1988 | * into the array form that BPF requires. 'fstart' will point to |
||
1989 | * the malloc'd array while 'ftail' is used during the recursive traversal. |
||
1990 | */ |
||
1991 | static struct bpf_insn *fstart; |
||
1992 | static struct bpf_insn *ftail; |
||
1993 | |||
1994 | #ifdef BDEBUG |
||
1995 | int bids[1000]; |
||
1996 | #endif |
||
1997 | |||
1998 | /* |
||
1999 | * Returns true if successful. Returns false if a branch has |
||
2000 | * an offset that is too large. If so, we have marked that |
||
2001 | * branch so that on a subsequent iteration, it will be treated |
||
2002 | * properly. |
||
2003 | */ |
||
2004 | static int |
||
2005 | convert_code_r(struct block *p) |
||
2006 | { |
||
2007 | struct bpf_insn *dst; |
||
2008 | struct slist *src; |
||
2009 | int slen; |
||
2010 | u_int off; |
||
2011 | int extrajmps; /* number of extra jumps inserted */ |
||
2012 | struct slist **offset = NULL; |
||
2013 | |||
2014 | if (p == 0 || isMarked(p)) |
||
2015 | return (1); |
||
2016 | Mark(p); |
||
2017 | |||
2018 | if (convert_code_r(JF(p)) == 0) |
||
2019 | return (0); |
||
2020 | if (convert_code_r(JT(p)) == 0) |
||
2021 | return (0); |
||
2022 | |||
2023 | slen = slength(p->stmts); |
||
2024 | dst = ftail -= (slen + 1 + p->longjt + p->longjf); |
||
2025 | /* inflate length by any extra jumps */ |
||
2026 | |||
2027 | p->offset = dst - fstart; |
||
2028 | |||
2029 | /* generate offset[] for convenience */ |
||
2030 | if (slen) { |
||
2031 | offset = (struct slist **)calloc(slen, sizeof(struct slist *)); |
||
2032 | if (!offset) { |
||
2033 | bpf_error("not enough core"); |
||
2034 | /*NOTREACHED*/ |
||
2035 | } |
||
2036 | } |
||
2037 | src = p->stmts; |
||
2038 | for (off = 0; off < slen && src; off++) { |
||
2039 | #if 0 |
||
2040 | printf("off=%d src=%x\n", off, src); |
||
2041 | #endif |
||
2042 | offset[off] = src; |
||
2043 | src = src->next; |
||
2044 | } |
||
2045 | |||
2046 | off = 0; |
||
2047 | for (src = p->stmts; src; src = src->next) { |
||
2048 | if (src->s.code == NOP) |
||
2049 | continue; |
||
2050 | dst->code = (u_short)src->s.code; |
||
2051 | dst->k = src->s.k; |
||
2052 | |||
2053 | /* fill block-local relative jump */ |
||
2054 | if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { |
||
2055 | #if 0 |
||
2056 | if (src->s.jt || src->s.jf) { |
||
2057 | bpf_error("illegal jmp destination"); |
||
2058 | /*NOTREACHED*/ |
||
2059 | } |
||
2060 | #endif |
||
2061 | goto filled; |
||
2062 | } |
||
2063 | if (off == slen - 2) /*???*/ |
||
2064 | goto filled; |
||
2065 | |||
2066 | { |
||
2067 | int i; |
||
2068 | int jt, jf; |
||
2069 | const char *ljerr = "%s for block-local relative jump: off=%d"; |
||
2070 | |||
2071 | #if 0 |
||
2072 | printf("code=%x off=%d %x %x\n", src->s.code, |
||
2073 | off, src->s.jt, src->s.jf); |
||
2074 | #endif |
||
2075 | |||
2076 | if (!src->s.jt || !src->s.jf) { |
||
2077 | bpf_error(ljerr, "no jmp destination", off); |
||
2078 | /*NOTREACHED*/ |
||
2079 | } |
||
2080 | |||
2081 | jt = jf = 0; |
||
2082 | for (i = 0; i < slen; i++) { |
||
2083 | if (offset[i] == src->s.jt) { |
||
2084 | if (jt) { |
||
2085 | bpf_error(ljerr, "multiple matches", off); |
||
2086 | /*NOTREACHED*/ |
||
2087 | } |
||
2088 | |||
2089 | dst->jt = i - off - 1; |
||
2090 | jt++; |
||
2091 | } |
||
2092 | if (offset[i] == src->s.jf) { |
||
2093 | if (jf) { |
||
2094 | bpf_error(ljerr, "multiple matches", off); |
||
2095 | /*NOTREACHED*/ |
||
2096 | } |
||
2097 | dst->jf = i - off - 1; |
||
2098 | jf++; |
||
2099 | } |
||
2100 | } |
||
2101 | if (!jt || !jf) { |
||
2102 | bpf_error(ljerr, "no destination found", off); |
||
2103 | /*NOTREACHED*/ |
||
2104 | } |
||
2105 | } |
||
2106 | filled: |
||
2107 | ++dst; |
||
2108 | ++off; |
||
2109 | } |
||
2110 | if (offset) |
||
2111 | free(offset); |
||
2112 | |||
2113 | #ifdef BDEBUG |
||
2114 | bids[dst - fstart] = p->id + 1; |
||
2115 | #endif |
||
2116 | dst->code = (u_short)p->s.code; |
||
2117 | dst->k = p->s.k; |
||
2118 | if (JT(p)) { |
||
2119 | extrajmps = 0; |
||
2120 | off = JT(p)->offset - (p->offset + slen) - 1; |
||
2121 | if (off >= 256) { |
||
2122 | /* offset too large for branch, must add a jump */ |
||
2123 | if (p->longjt == 0) { |
||
2124 | /* mark this instruction and retry */ |
||
2125 | p->longjt++; |
||
2126 | return(0); |
||
2127 | } |
||
2128 | /* branch if T to following jump */ |
||
2129 | dst->jt = extrajmps; |
||
2130 | extrajmps++; |
||
2131 | dst[extrajmps].code = BPF_JMP|BPF_JA; |
||
2132 | dst[extrajmps].k = off - extrajmps; |
||
2133 | } |
||
2134 | else |
||
2135 | dst->jt = off; |
||
2136 | off = JF(p)->offset - (p->offset + slen) - 1; |
||
2137 | if (off >= 256) { |
||
2138 | /* offset too large for branch, must add a jump */ |
||
2139 | if (p->longjf == 0) { |
||
2140 | /* mark this instruction and retry */ |
||
2141 | p->longjf++; |
||
2142 | return(0); |
||
2143 | } |
||
2144 | /* branch if F to following jump */ |
||
2145 | /* if two jumps are inserted, F goes to second one */ |
||
2146 | dst->jf = extrajmps; |
||
2147 | extrajmps++; |
||
2148 | dst[extrajmps].code = BPF_JMP|BPF_JA; |
||
2149 | dst[extrajmps].k = off - extrajmps; |
||
2150 | } |
||
2151 | else |
||
2152 | dst->jf = off; |
||
2153 | } |
||
2154 | return (1); |
||
2155 | } |
||
2156 | |||
2157 | |||
2158 | /* |
||
2159 | * Convert flowgraph intermediate representation to the |
||
2160 | * BPF array representation. Set *lenp to the number of instructions. |
||
2161 | * |
||
2162 | * This routine does *NOT* leak the memory pointed to by fp. It *must |
||
2163 | * not* do free(fp) before returning fp; doing so would make no sense, |
||
2164 | * as the BPF array pointed to by the return value of icode_to_fcode() |
||
2165 | * must be valid - it's being returned for use in a bpf_program structure. |
||
2166 | * |
||
2167 | * If it appears that icode_to_fcode() is leaking, the problem is that |
||
2168 | * the program using pcap_compile() is failing to free the memory in |
||
2169 | * the BPF program when it's done - the leak is in the program, not in |
||
2170 | * the routine that happens to be allocating the memory. (By analogy, if |
||
2171 | * a program calls fopen() without ever calling fclose() on the FILE *, |
||
2172 | * it will leak the FILE structure; the leak is not in fopen(), it's in |
||
2173 | * the program.) Change the program to use pcap_freecode() when it's |
||
2174 | * done with the filter program. See the pcap man page. |
||
2175 | */ |
||
2176 | struct bpf_insn * |
||
2177 | icode_to_fcode(struct block *root, u_int *lenp) |
||
2178 | { |
||
2179 | u_int n; |
||
2180 | struct bpf_insn *fp; |
||
2181 | |||
2182 | /* |
||
2183 | * Loop doing convert_code_r() until no branches remain |
||
2184 | * with too-large offsets. |
||
2185 | */ |
||
2186 | while (1) { |
||
2187 | unMarkAll(); |
||
2188 | n = *lenp = count_stmts(root); |
||
2189 | |||
2190 | fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); |
||
2191 | if (fp == NULL) |
||
2192 | bpf_error("malloc"); |
||
2193 | memset((char *)fp, 0, sizeof(*fp) * n); |
||
2194 | fstart = fp; |
||
2195 | ftail = fp + n; |
||
2196 | |||
2197 | unMarkAll(); |
||
2198 | if (convert_code_r(root)) |
||
2199 | break; |
||
2200 | free(fp); |
||
2201 | } |
||
2202 | |||
2203 | return fp; |
||
2204 | } |
||
2205 | |||
2206 | /* |
||
2207 | * Make a copy of a BPF program and put it in the "fcode" member of |
||
2208 | * a "pcap_t". |
||
2209 | * |
||
2210 | * If we fail to allocate memory for the copy, fill in the "errbuf" |
||
2211 | * member of the "pcap_t" with an error message, and return -1; |
||
2212 | * otherwise, return 0. |
||
2213 | */ |
||
2214 | int |
||
2215 | install_bpf_program(pcap_t *p, struct bpf_program *fp) |
||
2216 | { |
||
2217 | size_t prog_size; |
||
2218 | |||
2219 | /* |
||
2220 | * Validate the program. |
||
2221 | */ |
||
2222 | if (!bpf_validate(fp->bf_insns, fp->bf_len)) { |
||
2223 | snprintf(p->errbuf, sizeof(p->errbuf), |
||
2224 | "BPF program is not valid"); |
||
2225 | return (-1); |
||
2226 | } |
||
2227 | |||
2228 | /* |
||
2229 | * Free up any already installed program. |
||
2230 | */ |
||
2231 | pcap_freecode(&p->fcode); |
||
2232 | |||
2233 | prog_size = sizeof(*fp->bf_insns) * fp->bf_len; |
||
2234 | p->fcode.bf_len = fp->bf_len; |
||
2235 | p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); |
||
2236 | if (p->fcode.bf_insns == NULL) { |
||
2237 | snprintf(p->errbuf, sizeof(p->errbuf), |
||
2238 | "malloc: %s", pcap_strerror(errno)); |
||
2239 | return (-1); |
||
2240 | } |
||
2241 | memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); |
||
2242 | return (0); |
||
2243 | } |
||
2244 | |||
2245 | #ifdef BDEBUG |
||
2246 | static void |
||
2247 | dot_dump_node(struct block *block, struct bpf_program *prog, FILE *out) |
||
2248 | { |
||
2249 | int icount, noffset; |
||
2250 | int i; |
||
2251 | |||
2252 | if (block == NULL || isMarked(block)) |
||
2253 | return; |
||
2254 | Mark(block); |
||
2255 | |||
2256 | icount = slength(block->stmts) + 1 + block->longjt + block->longjf; |
||
2257 | noffset = min(block->offset + icount, (int)prog->bf_len); |
||
2258 | |||
2259 | fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id); |
||
2260 | for (i = block->offset; i < noffset; i++) { |
||
2261 | fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i)); |
||
2262 | } |
||
2263 | fprintf(out, "\" tooltip=\""); |
||
2264 | for (i = 0; i < BPF_MEMWORDS; i++) |
||
2265 | if (block->val[i] != 0) |
||
2266 | fprintf(out, "val[%d]=%d ", i, block->val[i]); |
||
2267 | fprintf(out, "val[A]=%d ", block->val[A_ATOM]); |
||
2268 | fprintf(out, "val[X]=%d", block->val[X_ATOM]); |
||
2269 | fprintf(out, "\""); |
||
2270 | if (JT(block) == NULL) |
||
2271 | fprintf(out, ", peripheries=2"); |
||
2272 | fprintf(out, "];\n"); |
||
2273 | |||
2274 | dot_dump_node(JT(block), prog, out); |
||
2275 | dot_dump_node(JF(block), prog, out); |
||
2276 | } |
||
2277 | static void |
||
2278 | dot_dump_edge(struct block *block, FILE *out) |
||
2279 | { |
||
2280 | if (block == NULL || isMarked(block)) |
||
2281 | return; |
||
2282 | Mark(block); |
||
2283 | |||
2284 | if (JT(block)) { |
||
2285 | fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n", |
||
2286 | block->id, JT(block)->id); |
||
2287 | fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n", |
||
2288 | block->id, JF(block)->id); |
||
2289 | } |
||
2290 | dot_dump_edge(JT(block), out); |
||
2291 | dot_dump_edge(JF(block), out); |
||
2292 | } |
||
2293 | /* Output the block CFG using graphviz/DOT language |
||
2294 | * In the CFG, block's code, value index for each registers at EXIT, |
||
2295 | * and the jump relationship is show. |
||
2296 | * |
||
2297 | * example DOT for BPF `ip src host 1.1.1.1' is: |
||
2298 | digraph BPF { |
||
2299 | block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; |
||
2300 | block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; |
||
2301 | block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; |
||
2302 | block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; |
||
2303 | "block0":se -> "block1":n [label="T"]; |
||
2304 | "block0":sw -> "block3":n [label="F"]; |
||
2305 | "block1":se -> "block2":n [label="T"]; |
||
2306 | "block1":sw -> "block3":n [label="F"]; |
||
2307 | } |
||
2308 | * |
||
2309 | * After install graphviz on http://www.graphviz.org/, save it as bpf.dot |
||
2310 | * and run `dot -Tpng -O bpf.dot' to draw the graph. |
||
2311 | */ |
||
2312 | static void |
||
2313 | dot_dump(struct block *root) |
||
2314 | { |
||
2315 | struct bpf_program f; |
||
2316 | FILE *out = stdout; |
||
2317 | |||
2318 | memset(bids, 0, sizeof bids); |
||
2319 | f.bf_insns = icode_to_fcode(root, &f.bf_len); |
||
2320 | |||
2321 | fprintf(out, "digraph BPF {\n"); |
||
2322 | unMarkAll(); |
||
2323 | dot_dump_node(root, &f, out); |
||
2324 | unMarkAll(); |
||
2325 | dot_dump_edge(root, out); |
||
2326 | fprintf(out, "}\n"); |
||
2327 | |||
2328 | free((char *)f.bf_insns); |
||
2329 | } |
||
2330 | |||
2331 | static void |
||
2332 | plain_dump(struct block *root) |
||
2333 | { |
||
2334 | struct bpf_program f; |
||
2335 | |||
2336 | memset(bids, 0, sizeof bids); |
||
2337 | f.bf_insns = icode_to_fcode(root, &f.bf_len); |
||
2338 | bpf_dump(&f, 1); |
||
2339 | putchar('\n'); |
||
2340 | free((char *)f.bf_insns); |
||
2341 | } |
||
2342 | static void |
||
2343 | opt_dump(struct block *root) |
||
2344 | { |
||
2345 | /* if optimizer debugging is enabled, output DOT graph |
||
2346 | * `dflag=4' is equivalent to -dddd to follow -d/-dd/-ddd |
||
2347 | * convention in tcpdump command line |
||
2348 | */ |
||
2349 | if (dflag > 3) |
||
2350 | dot_dump(root); |
||
2351 | else |
||
2352 | plain_dump(root); |
||
2353 | } |
||
2354 | |||
2355 | #endif |