WebSVN – OpenWrt – Blame – Rev 1 – /trunk/target/linux/generic/pending-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch

1

office

1

From: Alexander Duyck <alexander.h.duyck@redhat.com>

2

Date: Wed, 31 Dec 2014 10:56:12 -0800

3

Subject: [PATCH] fib_trie: Update meaning of pos to represent unchecked

4

bits

5

6

This change moves the pos value to the other side of the "bits" field. By

7

doing this it actually simplifies a significant amount of code in the trie.

8

9

For example when halving a tree we know that the bit lost exists at

10

oldnode->pos, and if we inflate the tree the new bit being add is at

11

tn->pos. Previously to find those bits you would have to subtract pos and

12

bits from the keylength or start with a value of (1 << 31) and then shift

13

that.

14

15

There are a number of spots throughout the code that benefit from this. In

16

the case of the hot-path searches the main advantage is that we can drop 2

17

or more operations from the search path as we no longer need to compute the

18

value for the index to be shifted by and can instead just use the raw pos

19

value.

20

21

In addition the tkey_extract_bits is now defunct and can be replaced by

22

get_index since the two operations were doing the same thing, but now

23

get_index does it much more quickly as it is only an xor and shift versus a

24

pair of shifts and a subtraction.

25

26

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>

27

Signed-off-by: David S. Miller <davem@davemloft.net>

28

---

29

30

--- a/net/ipv4/fib_trie.c

31

+++ b/net/ipv4/fib_trie.c

32

@@ -90,8 +90,7 @@ typedef unsigned int t_key;

33

#define IS_TNODE(n) ((n)->bits)

34

#define IS_LEAF(n) (!(n)->bits)

35

36

-#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)

37

-#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))

38

+#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)

struct tnode {

t_key key;

@@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_ch

43

return rcu_dereference_rtnl(tn->child[i]);

44

}

45

46

-static inline t_key mask_pfx(t_key k, unsigned int l)

47

-{

48

- return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);

49

-}

50

-

51

-static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)

52

-{

53

- if (offset < KEYLENGTH)

54

- return ((t_key)(a << offset)) >> (KEYLENGTH - bits);

- else

- return 0;

-}

-

-/*

- To understand this stuff, an understanding of keys and all their bits is

61

- necessary. Every node in the trie has a key associated with it, but not

62

- all of the bits in that key are significant.

63

-

64

- Consider a node 'n' and its parent 'tp'.

65

-

66

- If n is a leaf, every bit in its key is significant. Its presence is

67

- necessitated by path compression, since during a tree traversal (when

68

- searching for a leaf - unless we are doing an insertion) we will completely

69

- ignore all skipped bits we encounter. Thus we need to verify, at the end of

70

- a potentially successful search, that we have indeed been walking the

71

- correct key path.

72

-

73

- Note that we can never "miss" the correct key in the tree if present by

74

- following the wrong path. Path compression ensures that segments of the key

75

- that are the same for all keys with a given prefix are skipped, but the

76

- skipped part *is* identical for each node in the subtrie below the skipped

77

- bit! trie_insert() in this implementation takes care of that - note the

78

- call to tkey_sub_equals() in trie_insert().

79

-

80

- if n is an internal node - a 'tnode' here, the various parts of its key

81

- have many different meanings.

82

-

83

- Example:

84

- _________________________________________________________________

85

- | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |

86

- -----------------------------------------------------------------

87

- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

88

-

89

- _________________________________________________________________

90

- | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |

91

- -----------------------------------------------------------------

92

- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

-

- tp->pos = 7

- tp->bits = 3

- n->pos = 15

- n->bits = 4

-

- First, let's just ignore the bits that come before the parent tp, that is

100

- the bits from 0 to (tp->pos-1). They are *known* but at this point we do

101

- not use them for anything.

102

-

103

- The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the

104

- index into the parent's child array. That is, they will be used to find

105

- 'n' among tp's children.

106

-

107

- The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits

108

- for the node n.

109

-

110

- All the bits we have seen so far are significant to the node n. The rest

111

- of the bits are really not needed or indeed known in n->key.

112

-

113

- The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into

114

- n's child array, and will of course be different for each child.

115

-

116

-

117

- The rest of the bits, from (n->pos + n->bits) onward, are completely unknown

- at this point.

-

-*/

+/* To understand this stuff, an understanding of keys and all their bits is

122

+ * necessary. Every node in the trie has a key associated with it, but not

123

+ * all of the bits in that key are significant.

124

+ *

125

+ * Consider a node 'n' and its parent 'tp'.

126

+ *

127

+ * If n is a leaf, every bit in its key is significant. Its presence is

128

+ * necessitated by path compression, since during a tree traversal (when

129

+ * searching for a leaf - unless we are doing an insertion) we will completely

130

+ * ignore all skipped bits we encounter. Thus we need to verify, at the end of

131

+ * a potentially successful search, that we have indeed been walking the

132

+ * correct key path.

133

+ *

134

+ * Note that we can never "miss" the correct key in the tree if present by

135

+ * following the wrong path. Path compression ensures that segments of the key

136

+ * that are the same for all keys with a given prefix are skipped, but the

137

+ * skipped part *is* identical for each node in the subtrie below the skipped

138

+ * bit! trie_insert() in this implementation takes care of that.

139

+ *

140

+ * if n is an internal node - a 'tnode' here, the various parts of its key

141

+ * have many different meanings.

142

+ *

143

+ * Example:

144

+ * _________________________________________________________________

145

+ * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |

146

+ * -----------------------------------------------------------------

147

+ * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16

148

+ *

149

+ * _________________________________________________________________

150

+ * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |

151

+ * -----------------------------------------------------------------

152

+ * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0

+ *

+ * tp->pos = 22

+ * tp->bits = 3

+ * n->pos = 13

+ * n->bits = 4

+ *

+ * First, let's just ignore the bits that come before the parent tp, that is

160

+ * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this

161

+ * point we do not use them for anything.

162

+ *

163

+ * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the

164

+ * index into the parent's child array. That is, they will be used to find

165

+ * 'n' among tp's children.

166

+ *

167

+ * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits

168

+ * for the node n.

169

+ *

170

+ * All the bits we have seen so far are significant to the node n. The rest

171

+ * of the bits are really not needed or indeed known in n->key.

172

+ *

173

+ * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into

174

+ * n's child array, and will of course be different for each child.

175

+ *

176

+ * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown

+ * at this point.

+ */

static const int halve_threshold = 25;

181

static const int inflate_threshold = 50;

182

@@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)

183

* as the nodes are searched

184

*/

185

l->key = key;

186

- l->pos = KEYLENGTH;

187

+ l->pos = 0;

188

/* set bits to 0 indicating we are not a tnode */

189

l->bits = 0;

190

191

@@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key

tn->parent = NULL;

tn->pos = pos;

tn->bits = bits;

- tn->key = mask_pfx(key, pos);

196

+ tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;

197

tn->full_children = 0;

198

tn->empty_children = 1<<bits;

199

}

200

@@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key

return tn;

}

-/*

- * Check whether a tnode 'n' is "full", i.e. it is an internal node

206

+/* Check whether a tnode 'n' is "full", i.e. it is an internal node

207

* and no bits are skipped. See discussion in dyntree paper p. 6

208

*/

209

-

210

static inline int tnode_full(const struct tnode *tn, const struct tnode *n)

211

{

212

- return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));

213

+ return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);

214

}

215

216

static inline void put_child(struct tnode *tn, int i,

217

@@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie

218

{

219

int olen = tnode_child_length(oldtnode);

struct tnode *tn;

+ t_key m;

int i;

pr_debug("In inflate\n");

225

226

- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);

227

+ tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);

228

229

if (!tn)

230

return ERR_PTR(-ENOMEM);

231

@@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie

232

* fails. In case of failure we return the oldnode and inflate

233

* of tnode is ignored.

234

*/

235

+ for (i = 0, m = 1u << tn->pos; i < olen; i++) {

236

+ struct tnode *inode = tnode_get_child(oldtnode, i);

237

238

- for (i = 0; i < olen; i++) {

239

- struct tnode *inode;

240

-

241

- inode = tnode_get_child(oldtnode, i);

242

- if (tnode_full(oldtnode, inode) && inode->bits > 1) {

243

+ if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {

244

struct tnode *left, *right;

245

- t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;

246

247

- left = tnode_new(inode->key&(~m), inode->pos + 1,

248

+ left = tnode_new(inode->key & ~m, inode->pos,

inode->bits - 1);

if (!left)

goto nomem;

- right = tnode_new(inode->key|m, inode->pos + 1,

254

+ right = tnode_new(inode->key | m, inode->pos,

inode->bits - 1);

if (!right) {

@@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie

259

260

/* A leaf or an internal node with skipped bits */

261

if (!tnode_full(oldtnode, inode)) {

262

- put_child(tn,

263

- tkey_extract_bits(inode->key, tn->pos, tn->bits),

264

- inode);

265

+ put_child(tn, get_index(inode->key, tn), inode);

continue;

}

@@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *

270

271

pr_debug("In halve\n");

272

273

- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);

274

+ tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);

275

276

if (!tn)

277

return ERR_PTR(-ENOMEM);

278

@@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *

279

if (left && right) {

280

struct tnode *newn;

281

282

- newn = tnode_new(left->key, tn->pos + tn->bits, 1);

283

+ newn = tnode_new(left->key, oldtnode->pos, 1);

if (!newn)

goto nomem;

@@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *

288

key = tn->key;

289

290

while (tn != NULL && (tp = node_parent(tn)) != NULL) {

291

- cindex = tkey_extract_bits(key, tp->pos, tp->bits);

292

+ cindex = get_index(key, tp);

293

wasfull = tnode_full(tp, tnode_get_child(tp, cindex));

294

tn = resize(t, tn);

295

296

@@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node

*/

if (n) {

struct tnode *tn;

- int newpos;

-

- newpos = KEYLENGTH - __fls(n->key ^ key) - 1;

303

304

- tn = tnode_new(key, newpos, 1);

305

+ tn = tnode_new(key, __fls(key ^ n->key), 1);

306

if (!tn) {

307

free_leaf_info(li);

308

node_free(l);

309

@@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode

310

static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)

{

do {

- t_key idx;

-

- if (c)

- idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;

317

- else

318

- idx = 0;

319

+ t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;

320

321

while (idx < 1u << p->bits) {

322

c = tnode_get_child_rcu(p, idx++);

323

@@ -1851,7 +1819,7 @@ rescan:

324

/* Current node exhausted, pop back up */

325

p = node_parent_rcu(tn);

326

if (p) {

327

- cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;

328

+ cindex = get_index(tn->key, p) + 1;

tn = p;

--iter->depth;

goto rescan;

@@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_

333

if (IS_TNODE(n)) {

334

__be32 prf = htonl(n->key);

335

336

- seq_indent(seq, iter->depth - 1);

337

- seq_printf(seq, " +-- %pI4/%d %d %d %d\n",

338

- &prf, n->pos, n->bits, n->full_children,

339

- n->empty_children);

340

+ seq_indent(seq, iter->depth-1);

341

+ seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",

342

+ &prf, KEYLENGTH - n->pos - n->bits, n->bits,

343

+ n->full_children, n->empty_children);

344

} else {

345

struct leaf_info *li;

346

__be32 val = htonl(n->key);

OpenWrt – Blame information for rev 1