OpenWrt – Blame information for rev 3
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | From: Pablo Neira Ayuso <pablo@netfilter.org> |
2 | Date: Sun, 7 Jan 2018 01:03:56 +0100 |
||
3 | Subject: [PATCH] netfilter: nf_conntrack: add IPS_OFFLOAD status bit |
||
4 | |||
5 | This new bit tells us that the conntrack entry is owned by the flow |
||
6 | table offload infrastructure. |
||
7 | |||
8 | # cat /proc/net/nf_conntrack |
||
9 | ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 |
||
10 | |||
11 | Note the [OFFLOAD] tag in the listing. |
||
12 | |||
13 | The timer of such conntrack entries look like stopped from userspace. |
||
14 | In practise, to make sure the conntrack entry does not go away, the |
||
15 | conntrack timer is periodically set to an arbitrary large value that |
||
16 | gets refreshed on every iteration from the garbage collector, so it |
||
17 | never expires- and they display no internal state in the case of TCP |
||
18 | flows. This allows us to save a bitcheck from the packet path via |
||
19 | nf_ct_is_expired(). |
||
20 | |||
21 | Conntrack entries that have been offloaded to the flow table |
||
22 | infrastructure cannot be deleted/flushed via ctnetlink. The flow table |
||
23 | infrastructure is also responsible for releasing this conntrack entry. |
||
24 | |||
25 | Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> |
||
26 | --- |
||
27 | |||
28 | --- a/include/uapi/linux/netfilter/nf_conntrack_common.h |
||
29 | +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h |
||
30 | @@ -101,12 +101,16 @@ enum ip_conntrack_status { |
||
31 | IPS_HELPER_BIT = 13, |
||
32 | IPS_HELPER = (1 << IPS_HELPER_BIT), |
||
33 | |||
34 | + /* Conntrack has been offloaded to flow table. */ |
||
35 | + IPS_OFFLOAD_BIT = 14, |
||
36 | + IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT), |
||
37 | + |
||
38 | /* Be careful here, modifying these bits can make things messy, |
||
39 | * so don't let users modify them directly. |
||
40 | */ |
||
41 | IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | |
||
42 | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | |
||
43 | - IPS_SEQ_ADJUST | IPS_TEMPLATE), |
||
44 | + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), |
||
45 | |||
46 | __IPS_MAX_BIT = 14, |
||
47 | }; |
||
48 | --- a/net/netfilter/nf_conntrack_core.c |
||
49 | +++ b/net/netfilter/nf_conntrack_core.c |
||
50 | @@ -901,6 +901,9 @@ static unsigned int early_drop_list(stru |
||
51 | hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { |
||
52 | tmp = nf_ct_tuplehash_to_ctrack(h); |
||
53 | |||
54 | + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) |
||
55 | + continue; |
||
56 | + |
||
57 | if (nf_ct_is_expired(tmp)) { |
||
58 | nf_ct_gc_expired(tmp); |
||
59 | continue; |
||
3 | office | 60 | @@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(con |
1 | office | 61 | return false; |
62 | } |
||
63 | |||
64 | +#define DAY (86400 * HZ) |
||
65 | + |
||
66 | +/* Set an arbitrary timeout large enough not to ever expire, this save |
||
67 | + * us a check for the IPS_OFFLOAD_BIT from the packet path via |
||
68 | + * nf_ct_is_expired(). |
||
69 | + */ |
||
70 | +static void nf_ct_offload_timeout(struct nf_conn *ct) |
||
71 | +{ |
||
72 | + if (nf_ct_expires(ct) < DAY / 2) |
||
73 | + ct->timeout = nfct_time_stamp + DAY; |
||
74 | +} |
||
75 | + |
||
76 | static void gc_worker(struct work_struct *work) |
||
77 | { |
||
78 | unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); |
||
3 | office | 79 | @@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct |
1 | office | 80 | tmp = nf_ct_tuplehash_to_ctrack(h); |
81 | |||
82 | scanned++; |
||
83 | + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { |
||
84 | + nf_ct_offload_timeout(tmp); |
||
85 | + continue; |
||
86 | + } |
||
87 | + |
||
88 | if (nf_ct_is_expired(tmp)) { |
||
89 | nf_ct_gc_expired(tmp); |
||
90 | expired_count++; |
||
91 | --- a/net/netfilter/nf_conntrack_netlink.c |
||
92 | +++ b/net/netfilter/nf_conntrack_netlink.c |
||
3 | office | 93 | @@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_po |
1 | office | 94 | .len = NF_CT_LABELS_MAX_SIZE }, |
95 | }; |
||
96 | |||
97 | +static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data) |
||
98 | +{ |
||
99 | + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
||
100 | + return 0; |
||
101 | + |
||
102 | + return ctnetlink_filter_match(ct, data); |
||
103 | +} |
||
104 | + |
||
105 | static int ctnetlink_flush_conntrack(struct net *net, |
||
106 | const struct nlattr * const cda[], |
||
107 | u32 portid, int report) |
||
3 | office | 108 | @@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(str |
1 | office | 109 | return PTR_ERR(filter); |
110 | } |
||
111 | |||
112 | - nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter, |
||
113 | + nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter, |
||
114 | portid, report); |
||
115 | kfree(filter); |
||
116 | |||
3 | office | 117 | @@ -1163,6 +1171,11 @@ static int ctnetlink_del_conntrack(struc |
1 | office | 118 | |
119 | ct = nf_ct_tuplehash_to_ctrack(h); |
||
120 | |||
121 | + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) { |
||
122 | + nf_ct_put(ct); |
||
123 | + return -EBUSY; |
||
124 | + } |
||
125 | + |
||
126 | if (cda[CTA_ID]) { |
||
127 | u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); |
||
128 | if (id != (u32)(unsigned long)ct) { |
||
129 | --- a/net/netfilter/nf_conntrack_proto_tcp.c |
||
130 | +++ b/net/netfilter/nf_conntrack_proto_tcp.c |
||
131 | @@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_c |
||
132 | /* Print out the private part of the conntrack. */ |
||
133 | static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) |
||
134 | { |
||
135 | + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
||
136 | + return; |
||
137 | + |
||
138 | seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); |
||
139 | } |
||
140 | #endif |
||
141 | --- a/net/netfilter/nf_conntrack_standalone.c |
||
142 | +++ b/net/netfilter/nf_conntrack_standalone.c |
||
143 | @@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file * |
||
144 | WARN_ON(!l4proto); |
||
145 | |||
146 | ret = -ENOSPC; |
||
147 | - seq_printf(s, "%-8s %u %-8s %u %ld ", |
||
148 | + seq_printf(s, "%-8s %u %-8s %u ", |
||
149 | l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), |
||
150 | - l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), |
||
151 | - nf_ct_expires(ct) / HZ); |
||
152 | + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); |
||
153 | + |
||
154 | + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
||
155 | + seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ); |
||
156 | |||
157 | if (l4proto->print_conntrack) |
||
158 | l4proto->print_conntrack(s, ct); |
||
159 | @@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file * |
||
160 | if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) |
||
161 | goto release; |
||
162 | |||
163 | - if (test_bit(IPS_ASSURED_BIT, &ct->status)) |
||
164 | + if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) |
||
165 | + seq_puts(s, "[OFFLOAD] "); |
||
166 | + else if (test_bit(IPS_ASSURED_BIT, &ct->status)) |
||
167 | seq_puts(s, "[ASSURED] "); |
||
168 | |||
169 | if (seq_has_overflowed(s)) |