OpenWrt – Blame information for rev 2
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | From: Pablo Neira Ayuso <pablo@netfilter.org> |
2 | Date: Sun, 7 Jan 2018 01:04:15 +0100 |
||
3 | Subject: [PATCH] netfilter: flow table support for IPv4 |
||
4 | |||
5 | This patch adds the IPv4 flow table type, that implements the datapath |
||
6 | flow table to forward IPv4 traffic. Rationale is: |
||
7 | |||
8 | 1) Look up for the packet in the flow table, from the ingress hook. |
||
9 | 2) If there's a hit, decrement ttl and pass it on to the neighbour layer |
||
10 | for transmission. |
||
11 | 3) If there's a miss, packet is passed up to the classic forwarding |
||
12 | path. |
||
13 | |||
14 | This patch also supports layer 3 source and destination NAT. |
||
15 | |||
16 | Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> |
||
17 | --- |
||
18 | create mode 100644 net/ipv4/netfilter/nf_flow_table_ipv4.c |
||
19 | |||
20 | --- a/net/ipv4/netfilter/Kconfig |
||
21 | +++ b/net/ipv4/netfilter/Kconfig |
||
22 | @@ -78,6 +78,14 @@ config NF_TABLES_ARP |
||
23 | |||
24 | endif # NF_TABLES |
||
25 | |||
26 | +config NF_FLOW_TABLE_IPV4 |
||
27 | + select NF_FLOW_TABLE |
||
28 | + tristate "Netfilter flow table IPv4 module" |
||
29 | + help |
||
30 | + This option adds the flow table IPv4 support. |
||
31 | + |
||
32 | + To compile it as a module, choose M here. |
||
33 | + |
||
34 | config NF_DUP_IPV4 |
||
35 | tristate "Netfilter IPv4 packet duplication to alternate destination" |
||
36 | depends on !NF_CONNTRACK || NF_CONNTRACK |
||
37 | --- a/net/ipv4/netfilter/Makefile |
||
38 | +++ b/net/ipv4/netfilter/Makefile |
||
39 | @@ -43,6 +43,9 @@ obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redi |
||
40 | obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o |
||
41 | obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o |
||
42 | |||
43 | +# flow table support |
||
44 | +obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o |
||
45 | + |
||
46 | # generic IP tables |
||
47 | obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o |
||
48 | |||
49 | --- /dev/null |
||
50 | +++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c |
||
51 | @@ -0,0 +1,283 @@ |
||
52 | +#include <linux/kernel.h> |
||
53 | +#include <linux/init.h> |
||
54 | +#include <linux/module.h> |
||
55 | +#include <linux/netfilter.h> |
||
56 | +#include <linux/rhashtable.h> |
||
57 | +#include <linux/ip.h> |
||
58 | +#include <linux/netdevice.h> |
||
59 | +#include <net/ip.h> |
||
60 | +#include <net/neighbour.h> |
||
61 | +#include <net/netfilter/nf_flow_table.h> |
||
62 | +#include <net/netfilter/nf_tables.h> |
||
63 | +/* For layer 4 checksum field offset. */ |
||
64 | +#include <linux/tcp.h> |
||
65 | +#include <linux/udp.h> |
||
66 | + |
||
67 | +static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, |
||
68 | + __be32 addr, __be32 new_addr) |
||
69 | +{ |
||
70 | + struct tcphdr *tcph; |
||
71 | + |
||
72 | + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || |
||
73 | + skb_try_make_writable(skb, thoff + sizeof(*tcph))) |
||
74 | + return -1; |
||
75 | + |
||
76 | + tcph = (void *)(skb_network_header(skb) + thoff); |
||
77 | + inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); |
||
78 | + |
||
79 | + return 0; |
||
80 | +} |
||
81 | + |
||
82 | +static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, |
||
83 | + __be32 addr, __be32 new_addr) |
||
84 | +{ |
||
85 | + struct udphdr *udph; |
||
86 | + |
||
87 | + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || |
||
88 | + skb_try_make_writable(skb, thoff + sizeof(*udph))) |
||
89 | + return -1; |
||
90 | + |
||
91 | + udph = (void *)(skb_network_header(skb) + thoff); |
||
92 | + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { |
||
93 | + inet_proto_csum_replace4(&udph->check, skb, addr, |
||
94 | + new_addr, true); |
||
95 | + if (!udph->check) |
||
96 | + udph->check = CSUM_MANGLED_0; |
||
97 | + } |
||
98 | + |
||
99 | + return 0; |
||
100 | +} |
||
101 | + |
||
102 | +static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, |
||
103 | + unsigned int thoff, __be32 addr, |
||
104 | + __be32 new_addr) |
||
105 | +{ |
||
106 | + switch (iph->protocol) { |
||
107 | + case IPPROTO_TCP: |
||
108 | + if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) |
||
109 | + return NF_DROP; |
||
110 | + break; |
||
111 | + case IPPROTO_UDP: |
||
112 | + if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) |
||
113 | + return NF_DROP; |
||
114 | + break; |
||
115 | + } |
||
116 | + |
||
117 | + return 0; |
||
118 | +} |
||
119 | + |
||
120 | +static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
||
121 | + struct iphdr *iph, unsigned int thoff, |
||
122 | + enum flow_offload_tuple_dir dir) |
||
123 | +{ |
||
124 | + __be32 addr, new_addr; |
||
125 | + |
||
126 | + switch (dir) { |
||
127 | + case FLOW_OFFLOAD_DIR_ORIGINAL: |
||
128 | + addr = iph->saddr; |
||
129 | + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr; |
||
130 | + iph->saddr = new_addr; |
||
131 | + break; |
||
132 | + case FLOW_OFFLOAD_DIR_REPLY: |
||
133 | + addr = iph->daddr; |
||
134 | + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; |
||
135 | + iph->daddr = new_addr; |
||
136 | + break; |
||
137 | + default: |
||
138 | + return -1; |
||
139 | + } |
||
140 | + csum_replace4(&iph->check, addr, new_addr); |
||
141 | + |
||
142 | + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
||
143 | +} |
||
144 | + |
||
145 | +static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
||
146 | + struct iphdr *iph, unsigned int thoff, |
||
147 | + enum flow_offload_tuple_dir dir) |
||
148 | +{ |
||
149 | + __be32 addr, new_addr; |
||
150 | + |
||
151 | + switch (dir) { |
||
152 | + case FLOW_OFFLOAD_DIR_ORIGINAL: |
||
153 | + addr = iph->daddr; |
||
154 | + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr; |
||
155 | + iph->daddr = new_addr; |
||
156 | + break; |
||
157 | + case FLOW_OFFLOAD_DIR_REPLY: |
||
158 | + addr = iph->saddr; |
||
159 | + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; |
||
160 | + iph->saddr = new_addr; |
||
161 | + break; |
||
162 | + default: |
||
163 | + return -1; |
||
164 | + } |
||
165 | + |
||
166 | + return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); |
||
167 | +} |
||
168 | + |
||
169 | +static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, |
||
170 | + enum flow_offload_tuple_dir dir) |
||
171 | +{ |
||
172 | + struct iphdr *iph = ip_hdr(skb); |
||
173 | + unsigned int thoff = iph->ihl * 4; |
||
174 | + |
||
175 | + if (flow->flags & FLOW_OFFLOAD_SNAT && |
||
176 | + (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || |
||
177 | + nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) |
||
178 | + return -1; |
||
179 | + if (flow->flags & FLOW_OFFLOAD_DNAT && |
||
180 | + (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || |
||
181 | + nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) |
||
182 | + return -1; |
||
183 | + |
||
184 | + return 0; |
||
185 | +} |
||
186 | + |
||
187 | +static bool ip_has_options(unsigned int thoff) |
||
188 | +{ |
||
189 | + return thoff != sizeof(struct iphdr); |
||
190 | +} |
||
191 | + |
||
192 | +static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, |
||
193 | + struct flow_offload_tuple *tuple) |
||
194 | +{ |
||
195 | + struct flow_ports *ports; |
||
196 | + unsigned int thoff; |
||
197 | + struct iphdr *iph; |
||
198 | + |
||
199 | + if (!pskb_may_pull(skb, sizeof(*iph))) |
||
200 | + return -1; |
||
201 | + |
||
202 | + iph = ip_hdr(skb); |
||
203 | + thoff = iph->ihl * 4; |
||
204 | + |
||
205 | + if (ip_is_fragment(iph) || |
||
206 | + unlikely(ip_has_options(thoff))) |
||
207 | + return -1; |
||
208 | + |
||
209 | + if (iph->protocol != IPPROTO_TCP && |
||
210 | + iph->protocol != IPPROTO_UDP) |
||
211 | + return -1; |
||
212 | + |
||
213 | + thoff = iph->ihl * 4; |
||
214 | + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) |
||
215 | + return -1; |
||
216 | + |
||
217 | + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); |
||
218 | + |
||
219 | + tuple->src_v4.s_addr = iph->saddr; |
||
220 | + tuple->dst_v4.s_addr = iph->daddr; |
||
221 | + tuple->src_port = ports->source; |
||
222 | + tuple->dst_port = ports->dest; |
||
223 | + tuple->l3proto = AF_INET; |
||
224 | + tuple->l4proto = iph->protocol; |
||
225 | + tuple->iifidx = dev->ifindex; |
||
226 | + |
||
227 | + return 0; |
||
228 | +} |
||
229 | + |
||
230 | +/* Based on ip_exceeds_mtu(). */ |
||
231 | +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) |
||
232 | +{ |
||
233 | + if (skb->len <= mtu) |
||
234 | + return false; |
||
235 | + |
||
236 | + if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) |
||
237 | + return false; |
||
238 | + |
||
239 | + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) |
||
240 | + return false; |
||
241 | + |
||
242 | + return true; |
||
243 | +} |
||
244 | + |
||
245 | +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt) |
||
246 | +{ |
||
247 | + u32 mtu; |
||
248 | + |
||
249 | + mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); |
||
250 | + if (__nf_flow_exceeds_mtu(skb, mtu)) |
||
251 | + return true; |
||
252 | + |
||
253 | + return false; |
||
254 | +} |
||
255 | + |
||
256 | +static unsigned int |
||
257 | +nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, |
||
258 | + const struct nf_hook_state *state) |
||
259 | +{ |
||
260 | + struct flow_offload_tuple_rhash *tuplehash; |
||
261 | + struct nf_flowtable *flow_table = priv; |
||
262 | + struct flow_offload_tuple tuple = {}; |
||
263 | + enum flow_offload_tuple_dir dir; |
||
264 | + struct flow_offload *flow; |
||
265 | + struct net_device *outdev; |
||
266 | + const struct rtable *rt; |
||
267 | + struct iphdr *iph; |
||
268 | + __be32 nexthop; |
||
269 | + |
||
270 | + if (skb->protocol != htons(ETH_P_IP)) |
||
271 | + return NF_ACCEPT; |
||
272 | + |
||
273 | + if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) |
||
274 | + return NF_ACCEPT; |
||
275 | + |
||
276 | + tuplehash = flow_offload_lookup(flow_table, &tuple); |
||
277 | + if (tuplehash == NULL) |
||
278 | + return NF_ACCEPT; |
||
279 | + |
||
280 | + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); |
||
281 | + if (!outdev) |
||
282 | + return NF_ACCEPT; |
||
283 | + |
||
284 | + dir = tuplehash->tuple.dir; |
||
285 | + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); |
||
286 | + |
||
287 | + rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; |
||
288 | + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) |
||
289 | + return NF_ACCEPT; |
||
290 | + |
||
291 | + if (skb_try_make_writable(skb, sizeof(*iph))) |
||
292 | + return NF_DROP; |
||
293 | + |
||
294 | + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && |
||
295 | + nf_flow_nat_ip(flow, skb, dir) < 0) |
||
296 | + return NF_DROP; |
||
297 | + |
||
298 | + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; |
||
299 | + iph = ip_hdr(skb); |
||
300 | + ip_decrease_ttl(iph); |
||
301 | + |
||
302 | + skb->dev = outdev; |
||
303 | + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); |
||
304 | + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); |
||
305 | + |
||
306 | + return NF_STOLEN; |
||
307 | +} |
||
308 | + |
||
309 | +static struct nf_flowtable_type flowtable_ipv4 = { |
||
310 | + .family = NFPROTO_IPV4, |
||
311 | + .params = &nf_flow_offload_rhash_params, |
||
312 | + .gc = nf_flow_offload_work_gc, |
||
313 | + .hook = nf_flow_offload_ip_hook, |
||
314 | + .owner = THIS_MODULE, |
||
315 | +}; |
||
316 | + |
||
317 | +static int __init nf_flow_ipv4_module_init(void) |
||
318 | +{ |
||
319 | + nft_register_flowtable_type(&flowtable_ipv4); |
||
320 | + |
||
321 | + return 0; |
||
322 | +} |
||
323 | + |
||
324 | +static void __exit nf_flow_ipv4_module_exit(void) |
||
325 | +{ |
||
326 | + nft_unregister_flowtable_type(&flowtable_ipv4); |
||
327 | +} |
||
328 | + |
||
329 | +module_init(nf_flow_ipv4_module_init); |
||
330 | +module_exit(nf_flow_ipv4_module_exit); |
||
331 | + |
||
332 | +MODULE_LICENSE("GPL"); |
||
333 | +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); |
||
334 | +MODULE_ALIAS_NF_FLOWTABLE(AF_INET); |