OpenWrt – Blame information for rev 3
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | From: Pablo Neira Ayuso <pablo@netfilter.org> |
2 | Date: Sun, 7 Jan 2018 01:04:26 +0100 |
||
3 | Subject: [PATCH] netfilter: nf_tables: flow offload expression |
||
4 | |||
5 | Add new instruction for the nf_tables VM that allows us to specify what |
||
6 | flows are offloaded into a given flow table via name. This new |
||
7 | instruction creates the flow entry and adds it to the flow table. |
||
8 | |||
9 | Only established flows, ie. we have seen traffic in both directions, are |
||
10 | added to the flow table. You can still decide to offload entries at a |
||
11 | later stage via packet counting or checking the ct status in case you |
||
12 | want to offload assured conntracks. |
||
13 | |||
14 | This new extension depends on the conntrack subsystem. |
||
15 | |||
16 | Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> |
||
17 | --- |
||
18 | create mode 100644 net/netfilter/nft_flow_offload.c |
||
19 | |||
20 | --- a/include/uapi/linux/netfilter/nf_tables.h |
||
21 | +++ b/include/uapi/linux/netfilter/nf_tables.h |
||
22 | @@ -957,6 +957,17 @@ enum nft_ct_attributes { |
||
23 | }; |
||
24 | #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) |
||
25 | |||
26 | +/** |
||
27 | + * enum nft_flow_attributes - ct offload expression attributes |
||
28 | + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING) |
||
29 | + */ |
||
30 | +enum nft_offload_attributes { |
||
31 | + NFTA_FLOW_UNSPEC, |
||
32 | + NFTA_FLOW_TABLE_NAME, |
||
33 | + __NFTA_FLOW_MAX, |
||
34 | +}; |
||
35 | +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1) |
||
36 | + |
||
37 | enum nft_limit_type { |
||
38 | NFT_LIMIT_PKTS, |
||
39 | NFT_LIMIT_PKT_BYTES |
||
40 | --- a/net/netfilter/Kconfig |
||
41 | +++ b/net/netfilter/Kconfig |
||
3 | office | 42 | @@ -509,6 +509,13 @@ config NFT_CT |
1 | office | 43 | This option adds the "ct" expression that you can use to match |
44 | connection tracking information such as the flow state. |
||
45 | |||
46 | +config NFT_FLOW_OFFLOAD |
||
47 | + depends on NF_CONNTRACK |
||
48 | + tristate "Netfilter nf_tables hardware flow offload module" |
||
49 | + help |
||
50 | + This option adds the "flow_offload" expression that you can use to |
||
51 | + choose what flows are placed into the hardware. |
||
52 | + |
||
53 | config NFT_SET_RBTREE |
||
54 | tristate "Netfilter nf_tables rbtree set module" |
||
55 | help |
||
56 | --- a/net/netfilter/Makefile |
||
57 | +++ b/net/netfilter/Makefile |
||
58 | @@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o |
||
59 | obj-$(CONFIG_NFT_RT) += nft_rt.o |
||
60 | obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o |
||
61 | obj-$(CONFIG_NFT_CT) += nft_ct.o |
||
62 | +obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o |
||
63 | obj-$(CONFIG_NFT_LIMIT) += nft_limit.o |
||
64 | obj-$(CONFIG_NFT_NAT) += nft_nat.o |
||
65 | obj-$(CONFIG_NFT_OBJREF) += nft_objref.o |
||
66 | --- /dev/null |
||
67 | +++ b/net/netfilter/nft_flow_offload.c |
||
68 | @@ -0,0 +1,264 @@ |
||
69 | +#include <linux/kernel.h> |
||
70 | +#include <linux/module.h> |
||
71 | +#include <linux/init.h> |
||
72 | +#include <linux/netlink.h> |
||
73 | +#include <linux/netfilter.h> |
||
74 | +#include <linux/workqueue.h> |
||
75 | +#include <linux/spinlock.h> |
||
76 | +#include <linux/netfilter/nf_tables.h> |
||
77 | +#include <net/ip.h> /* for ipv4 options. */ |
||
78 | +#include <net/netfilter/nf_tables.h> |
||
79 | +#include <net/netfilter/nf_tables_core.h> |
||
80 | +#include <net/netfilter/nf_conntrack_core.h> |
||
81 | +#include <linux/netfilter/nf_conntrack_common.h> |
||
82 | +#include <net/netfilter/nf_flow_table.h> |
||
83 | + |
||
84 | +struct nft_flow_offload { |
||
85 | + struct nft_flowtable *flowtable; |
||
86 | +}; |
||
87 | + |
||
88 | +static int nft_flow_route(const struct nft_pktinfo *pkt, |
||
89 | + const struct nf_conn *ct, |
||
90 | + struct nf_flow_route *route, |
||
91 | + enum ip_conntrack_dir dir) |
||
92 | +{ |
||
93 | + struct dst_entry *this_dst = skb_dst(pkt->skb); |
||
94 | + struct dst_entry *other_dst = NULL; |
||
95 | + struct flowi fl; |
||
96 | + |
||
97 | + memset(&fl, 0, sizeof(fl)); |
||
98 | + switch (nft_pf(pkt)) { |
||
99 | + case NFPROTO_IPV4: |
||
100 | + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; |
||
101 | + break; |
||
102 | + case NFPROTO_IPV6: |
||
103 | + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; |
||
104 | + break; |
||
105 | + } |
||
106 | + |
||
107 | + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); |
||
108 | + if (!other_dst) |
||
109 | + return -ENOENT; |
||
110 | + |
||
111 | + route->tuple[dir].dst = this_dst; |
||
112 | + route->tuple[dir].ifindex = nft_in(pkt)->ifindex; |
||
113 | + route->tuple[!dir].dst = other_dst; |
||
114 | + route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; |
||
115 | + |
||
116 | + return 0; |
||
117 | +} |
||
118 | + |
||
119 | +static bool nft_flow_offload_skip(struct sk_buff *skb) |
||
120 | +{ |
||
121 | + struct ip_options *opt = &(IPCB(skb)->opt); |
||
122 | + |
||
123 | + if (unlikely(opt->optlen)) |
||
124 | + return true; |
||
125 | + if (skb_sec_path(skb)) |
||
126 | + return true; |
||
127 | + |
||
128 | + return false; |
||
129 | +} |
||
130 | + |
||
131 | +static void nft_flow_offload_eval(const struct nft_expr *expr, |
||
132 | + struct nft_regs *regs, |
||
133 | + const struct nft_pktinfo *pkt) |
||
134 | +{ |
||
135 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
||
136 | + struct nf_flowtable *flowtable = &priv->flowtable->data; |
||
137 | + enum ip_conntrack_info ctinfo; |
||
138 | + struct nf_flow_route route; |
||
139 | + struct flow_offload *flow; |
||
140 | + enum ip_conntrack_dir dir; |
||
141 | + struct nf_conn *ct; |
||
142 | + int ret; |
||
143 | + |
||
144 | + if (nft_flow_offload_skip(pkt->skb)) |
||
145 | + goto out; |
||
146 | + |
||
147 | + ct = nf_ct_get(pkt->skb, &ctinfo); |
||
148 | + if (!ct) |
||
149 | + goto out; |
||
150 | + |
||
151 | + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { |
||
152 | + case IPPROTO_TCP: |
||
153 | + case IPPROTO_UDP: |
||
154 | + break; |
||
155 | + default: |
||
156 | + goto out; |
||
157 | + } |
||
158 | + |
||
159 | + if (test_bit(IPS_HELPER_BIT, &ct->status)) |
||
160 | + goto out; |
||
161 | + |
||
162 | + if (ctinfo == IP_CT_NEW || |
||
163 | + ctinfo == IP_CT_RELATED) |
||
164 | + goto out; |
||
165 | + |
||
166 | + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) |
||
167 | + goto out; |
||
168 | + |
||
169 | + dir = CTINFO2DIR(ctinfo); |
||
170 | + if (nft_flow_route(pkt, ct, &route, dir) < 0) |
||
171 | + goto err_flow_route; |
||
172 | + |
||
173 | + flow = flow_offload_alloc(ct, &route); |
||
174 | + if (!flow) |
||
175 | + goto err_flow_alloc; |
||
176 | + |
||
177 | + ret = flow_offload_add(flowtable, flow); |
||
178 | + if (ret < 0) |
||
179 | + goto err_flow_add; |
||
180 | + |
||
181 | + return; |
||
182 | + |
||
183 | +err_flow_add: |
||
184 | + flow_offload_free(flow); |
||
185 | +err_flow_alloc: |
||
186 | + dst_release(route.tuple[!dir].dst); |
||
187 | +err_flow_route: |
||
188 | + clear_bit(IPS_OFFLOAD_BIT, &ct->status); |
||
189 | +out: |
||
190 | + regs->verdict.code = NFT_BREAK; |
||
191 | +} |
||
192 | + |
||
193 | +static int nft_flow_offload_validate(const struct nft_ctx *ctx, |
||
194 | + const struct nft_expr *expr, |
||
195 | + const struct nft_data **data) |
||
196 | +{ |
||
197 | + unsigned int hook_mask = (1 << NF_INET_FORWARD); |
||
198 | + |
||
199 | + return nft_chain_validate_hooks(ctx->chain, hook_mask); |
||
200 | +} |
||
201 | + |
||
202 | +static int nft_flow_offload_init(const struct nft_ctx *ctx, |
||
203 | + const struct nft_expr *expr, |
||
204 | + const struct nlattr * const tb[]) |
||
205 | +{ |
||
206 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
||
207 | + u8 genmask = nft_genmask_next(ctx->net); |
||
208 | + struct nft_flowtable *flowtable; |
||
209 | + |
||
210 | + if (!tb[NFTA_FLOW_TABLE_NAME]) |
||
211 | + return -EINVAL; |
||
212 | + |
||
213 | + flowtable = nf_tables_flowtable_lookup(ctx->table, |
||
214 | + tb[NFTA_FLOW_TABLE_NAME], |
||
215 | + genmask); |
||
216 | + if (IS_ERR(flowtable)) |
||
217 | + return PTR_ERR(flowtable); |
||
218 | + |
||
219 | + priv->flowtable = flowtable; |
||
220 | + flowtable->use++; |
||
221 | + |
||
222 | + return nf_ct_netns_get(ctx->net, ctx->afi->family); |
||
223 | +} |
||
224 | + |
||
225 | +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, |
||
226 | + const struct nft_expr *expr) |
||
227 | +{ |
||
228 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
||
229 | + |
||
230 | + priv->flowtable->use--; |
||
231 | + nf_ct_netns_put(ctx->net, ctx->afi->family); |
||
232 | +} |
||
233 | + |
||
234 | +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) |
||
235 | +{ |
||
236 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
||
237 | + |
||
238 | + if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) |
||
239 | + goto nla_put_failure; |
||
240 | + |
||
241 | + return 0; |
||
242 | + |
||
243 | +nla_put_failure: |
||
244 | + return -1; |
||
245 | +} |
||
246 | + |
||
247 | +static struct nft_expr_type nft_flow_offload_type; |
||
248 | +static const struct nft_expr_ops nft_flow_offload_ops = { |
||
249 | + .type = &nft_flow_offload_type, |
||
250 | + .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), |
||
251 | + .eval = nft_flow_offload_eval, |
||
252 | + .init = nft_flow_offload_init, |
||
253 | + .destroy = nft_flow_offload_destroy, |
||
254 | + .validate = nft_flow_offload_validate, |
||
255 | + .dump = nft_flow_offload_dump, |
||
256 | +}; |
||
257 | + |
||
258 | +static struct nft_expr_type nft_flow_offload_type __read_mostly = { |
||
259 | + .name = "flow_offload", |
||
260 | + .ops = &nft_flow_offload_ops, |
||
261 | + .maxattr = NFTA_FLOW_MAX, |
||
262 | + .owner = THIS_MODULE, |
||
263 | +}; |
||
264 | + |
||
265 | +static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) |
||
266 | +{ |
||
267 | + struct net_device *dev = data; |
||
268 | + |
||
269 | + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) |
||
270 | + return; |
||
271 | + |
||
272 | + flow_offload_dead(flow); |
||
273 | +} |
||
274 | + |
||
275 | +static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, |
||
276 | + void *data) |
||
277 | +{ |
||
278 | + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); |
||
279 | +} |
||
280 | + |
||
281 | +static int flow_offload_netdev_event(struct notifier_block *this, |
||
282 | + unsigned long event, void *ptr) |
||
283 | +{ |
||
284 | + struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
||
285 | + |
||
286 | + if (event != NETDEV_DOWN) |
||
287 | + return NOTIFY_DONE; |
||
288 | + |
||
289 | + nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); |
||
290 | + |
||
291 | + return NOTIFY_DONE; |
||
292 | +} |
||
293 | + |
||
294 | +static struct notifier_block flow_offload_netdev_notifier = { |
||
295 | + .notifier_call = flow_offload_netdev_event, |
||
296 | +}; |
||
297 | + |
||
298 | +static int __init nft_flow_offload_module_init(void) |
||
299 | +{ |
||
300 | + int err; |
||
301 | + |
||
302 | + register_netdevice_notifier(&flow_offload_netdev_notifier); |
||
303 | + |
||
304 | + err = nft_register_expr(&nft_flow_offload_type); |
||
305 | + if (err < 0) |
||
306 | + goto register_expr; |
||
307 | + |
||
308 | + return 0; |
||
309 | + |
||
310 | +register_expr: |
||
311 | + unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
||
312 | + return err; |
||
313 | +} |
||
314 | + |
||
315 | +static void __exit nft_flow_offload_module_exit(void) |
||
316 | +{ |
||
317 | + struct net *net; |
||
318 | + |
||
319 | + nft_unregister_expr(&nft_flow_offload_type); |
||
320 | + unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
||
321 | + rtnl_lock(); |
||
322 | + for_each_net(net) |
||
323 | + nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); |
||
324 | + rtnl_unlock(); |
||
325 | +} |
||
326 | + |
||
327 | +module_init(nft_flow_offload_module_init); |
||
328 | +module_exit(nft_flow_offload_module_exit); |
||
329 | + |
||
330 | +MODULE_LICENSE("GPL"); |
||
331 | +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |
||
332 | +MODULE_ALIAS_NFT_EXPR("flow_offload"); |