nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /*--------------------------------------------------------------- |
2 | * Copyright (c) 1999,2000,2001,2002,2003 |
||
3 | * The Board of Trustees of the University of Illinois |
||
4 | * All Rights Reserved. |
||
5 | *--------------------------------------------------------------- |
||
6 | * Permission is hereby granted, free of charge, to any person |
||
7 | * obtaining a copy of this software (Iperf) and associated |
||
8 | * documentation files (the "Software"), to deal in the Software |
||
9 | * without restriction, including without limitation the |
||
10 | * rights to use, copy, modify, merge, publish, distribute, |
||
11 | * sublicense, and/or sell copies of the Software, and to permit |
||
12 | * persons to whom the Software is furnished to do |
||
13 | * so, subject to the following conditions: |
||
14 | * |
||
15 | * |
||
16 | * Redistributions of source code must retain the above |
||
17 | * copyright notice, this list of conditions and |
||
18 | * the following disclaimers. |
||
19 | * |
||
20 | * |
||
21 | * Redistributions in binary form must reproduce the above |
||
22 | * copyright notice, this list of conditions and the following |
||
23 | * disclaimers in the documentation and/or other materials |
||
24 | * provided with the distribution. |
||
25 | * |
||
26 | * |
||
27 | * Neither the names of the University of Illinois, NCSA, |
||
28 | * nor the names of its contributors may be used to endorse |
||
29 | * or promote products derived from this Software without |
||
30 | * specific prior written permission. |
||
31 | * |
||
32 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
33 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
||
34 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||
35 | * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTIBUTORS OR COPYRIGHT |
||
36 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
||
37 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
38 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
39 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
40 | * ________________________________________________________________ |
||
41 | * National Laboratory for Applied Network Research |
||
42 | * National Center for Supercomputing Applications |
||
43 | * University of Illinois at Urbana-Champaign |
||
44 | * http://www.ncsa.uiuc.edu |
||
45 | * ________________________________________________________________ |
||
46 | * |
||
47 | * delay.c |
||
48 | * by Mark Gates <mgates@nlanr.net> |
||
49 | * updates |
||
50 | * by Robert J. McMahon <rmcmahon@broadcom.com> <rjmcmahon@rjmcmahon.com> |
||
51 | * ------------------------------------------------------------------- |
||
52 | * attempts at accurate microsecond delays |
||
53 | * ------------------------------------------------------------------- */ |
||
54 | #include "headers.h" |
||
55 | #include "util.h" |
||
56 | #include "delay.h" |
||
57 | #include <math.h> |
||
58 | |||
59 | #define MILLION 1000000 |
||
60 | #define BILLION 1000000000 |
||
61 | |||
62 | /* ------------------------------------------------------------------- |
||
63 | * A micro-second delay function |
||
64 | * o Use a busy loop or nanosleep |
||
65 | * |
||
66 | * Some notes: |
||
67 | * o clock_gettime() (if available) is preferred over gettimeofday() |
||
68 | * as it give nanosecond resolution and should be more efficient. |
||
69 | * It also supports CLOCK_MONOTONIC and CLOCK_MONOTONIC_RAW |
||
70 | * though CLOCK_REALTIME is being used by the code. |
||
71 | * o This code does not use Timestamp object, as the goal of these |
||
72 | * functions is accurate delays (vs accurate timestamps.) |
||
73 | * o The syscalls such as nanosleep guarantee at least the request time |
||
74 | * and can and will delay longer, particularly due to things like context |
||
75 | * switching, causing the delay to lose accuracy |
||
76 | * o Kalman filtering is used to predict delay error which in turn |
||
77 | * is used to adjust the delay, hopefully mitigating the above. |
||
78 | * Note: This can cause the delay to return faster than the request, |
||
79 | * i.e. the *at least* guarantee is not preserved for the kalman |
||
80 | * adjusted delay calls. |
||
81 | * o Remember, the Client is keeping a running average delay for the |
||
82 | * thread so errors in delay will also be adjusted there. (Assuming |
||
83 | * it's possible. It's not really possible at top line link rates |
||
84 | * because lost time can't be made up for by speeding up the transmits. |
||
85 | * Hence, don't lose time with delay calls which error on the side of |
||
86 | * taking too long. Kalman should help much here.) |
||
87 | * |
||
88 | * POSIX nanosleep(). This allows a higher timing resolution |
||
89 | * (under Linux e.g. it uses hrtimers), does not affect any signals, |
||
90 | * and will use up remaining time when interrupted. |
||
91 | * ------------------------------------------------------------------- */ |
||
92 | |||
93 | void delay_loop(unsigned long usec) |
||
94 | { |
||
95 | #ifdef HAVE_KALMAN |
||
96 | delay_kalman(usec); |
||
97 | #else |
||
98 | #ifdef HAVE_NANOSLEEP |
||
99 | delay_nanosleep(usec); |
||
100 | #else |
||
101 | delay_busyloop(usec); |
||
102 | #endif |
||
103 | #endif |
||
104 | } |
||
105 | |||
106 | #ifdef HAVE_NANOSLEEP |
||
107 | // Can use the nanosleep syscall suspending the thread |
||
108 | void delay_nanosleep (unsigned long usec) { |
||
109 | struct timespec requested, remaining; |
||
110 | requested.tv_sec = 0; |
||
111 | requested.tv_nsec = usec * 1000L; |
||
112 | // Note, signals will cause the nanosleep |
||
113 | // to return early. That's fine. |
||
114 | nanosleep(&requested, &remaining); |
||
115 | } |
||
116 | #endif |
||
117 | |||
118 | #if defined (HAVE_NANOSLEEP) || defined (HAVE_CLOCK_GETTIME) |
||
119 | static void timespec_add_ulong (struct timespec *tv0, unsigned long value) { |
||
120 | tv0->tv_nsec += value; |
||
121 | if (tv0->tv_nsec >= BILLION) { |
||
122 | tv0->tv_sec++; |
||
123 | tv0->tv_nsec -= BILLION; |
||
124 | } |
||
125 | } |
||
126 | #endif |
||
127 | |||
128 | #ifdef HAVE_KALMAN |
||
129 | // Kalman versions attempt to support delay request |
||
130 | // accuracy over a minimum guaranteed delay by |
||
131 | // prediciting the delay error. This is |
||
132 | // the basic recursive algorithm. |
||
133 | static void kalman_update (kalman_state *state, double measurement) { |
||
134 | //prediction update |
||
135 | state->p = state->p + state->q; |
||
136 | //measurement update |
||
137 | state->k = state->p / (state->p + state->r); |
||
138 | state->x = state->x + (state->k * (measurement - state->x)); |
||
139 | state->p = (1 - state->k) * state->p; |
||
140 | } |
||
141 | #endif |
||
142 | |||
143 | #ifdef HAVE_CLOCK_GETTIME |
||
144 | // Delay calls for systems with clock_gettime |
||
145 | // Working units are nanoseconds and structures are timespec |
||
146 | static void timespec_add_double (struct timespec *tv0, double value) { |
||
147 | tv0->tv_nsec += (unsigned long) value; |
||
148 | if (tv0->tv_nsec >= BILLION) { |
||
149 | tv0->tv_sec++; |
||
150 | tv0->tv_nsec -= BILLION; |
||
151 | } |
||
152 | } |
||
153 | // tv1 assumed greater than tv0 |
||
154 | static double timespec_diff (struct timespec tv1, struct timespec tv0) { |
||
155 | double result; |
||
156 | if (tv1.tv_nsec < tv0.tv_nsec) { |
||
157 | tv1.tv_nsec += BILLION; |
||
158 | tv1.tv_sec--; |
||
159 | } |
||
160 | result = (double) (((tv1.tv_sec - tv0.tv_sec) * BILLION) + (tv1.tv_nsec - tv0.tv_nsec)); |
||
161 | return result; |
||
162 | } |
||
163 | static void timespec_add( struct timespec *tv0, struct timespec *tv1) |
||
164 | { |
||
165 | tv0->tv_sec += tv1->tv_sec; |
||
166 | tv0->tv_nsec += tv1->tv_nsec; |
||
167 | if ( tv0->tv_nsec >= BILLION ) { |
||
168 | tv0->tv_nsec -= BILLION; |
||
169 | tv0->tv_sec++; |
||
170 | } |
||
171 | } |
||
172 | static inline |
||
173 | int timespec_greaterthan(struct timespec tv1, struct timespec tv0) { |
||
174 | if (tv1.tv_sec > tv0.tv_sec || \ |
||
175 | ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_nsec > tv0.tv_nsec))) { |
||
176 | return 1; |
||
177 | } else { |
||
178 | return 0; |
||
179 | } |
||
180 | } |
||
181 | // A cpu busy loop for systems with clock_gettime |
||
182 | void delay_busyloop (unsigned long usec) { |
||
183 | struct timespec t1, t2; |
||
184 | clock_gettime(CLOCK_REALTIME, &t1); |
||
185 | timespec_add_ulong(&t1, (usec * 1000L)); |
||
186 | while (1) { |
||
187 | clock_gettime(CLOCK_REALTIME, &t2); |
||
188 | if (timespec_greaterthan(t2, t1)) |
||
189 | break; |
||
190 | } |
||
191 | } |
||
192 | // Kalman routines for systems with clock_gettime |
||
193 | #ifdef HAVE_KALMAN |
||
194 | // Request units is microseconds |
||
195 | // Adjust units is nanoseconds |
||
196 | void delay_kalman (unsigned long usec) { |
||
197 | struct timespec t1, t2, finishtime, requested={0,0}, remaining; |
||
198 | double nsec_adjusted, err; |
||
199 | static kalman_state kalmanerr={ |
||
200 | 0.00001, //q process noise covariance |
||
201 | 0.1, //r measurement noise covariance |
||
202 | 0.0, //x value, error predictio (units nanoseconds) |
||
203 | 1, //p estimation error covariance |
||
204 | 0.75 //k kalman gain |
||
205 | }; |
||
206 | // Get the current clock |
||
207 | clock_gettime(CLOCK_REALTIME, &t1); |
||
208 | // Perform the kalman adjust per the predicted delay error |
||
209 | nsec_adjusted = (usec * 1000.0) - kalmanerr.x; |
||
210 | // Set a timespec to be used by the nanosleep |
||
211 | // as well as for the finished time calculation |
||
212 | timespec_add_double(&requested, nsec_adjusted); |
||
213 | // Set the finish time in timespec format |
||
214 | finishtime = t1; |
||
215 | timespec_add(&finishtime, &requested); |
||
216 | # ifdef HAVE_NANOSLEEP |
||
217 | // Don't call nanosleep for values less than 10 microseconds |
||
218 | // as the syscall is too expensive. Let the busy loop |
||
219 | // provide the delay for times under that. |
||
220 | if (nsec_adjusted > 10000) { |
||
221 | nanosleep(&requested, &remaining); |
||
222 | } |
||
223 | # endif |
||
224 | while (1) { |
||
225 | clock_gettime(CLOCK_REALTIME, &t2); |
||
226 | if (timespec_greaterthan(t2, finishtime)) |
||
227 | break; |
||
228 | } |
||
229 | // Compute the delay error in units of nanoseconds |
||
230 | // and cast to type double |
||
231 | err = (double) (timespec_diff(t2, t1) - (usec * 1000)); |
||
232 | // printf("req: %ld adj: %f err: %.5f (ns)\n", usec, nsec_adjusted, kalmanerr.x); |
||
233 | kalman_update(&kalmanerr, err); |
||
234 | } |
||
235 | #endif // HAVE_KALMAN |
||
236 | #else |
||
237 | // Sadly, these systems must use the not so efficient gettimeofday() |
||
238 | // and working units are microseconds, struct is timeval |
||
239 | static void timeval_add_ulong (struct timeval *tv0, unsigned long value) { |
||
240 | tv0->tv_usec += value; |
||
241 | if (tv0->tv_usec >= MILLION) { |
||
242 | tv0->tv_sec++; |
||
243 | tv0->tv_usec -= MILLION; |
||
244 | } |
||
245 | } |
||
246 | static inline |
||
247 | int timeval_greaterthan(struct timeval tv1, struct timeval tv0) { |
||
248 | if (tv1.tv_sec > tv0.tv_sec || \ |
||
249 | ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_usec > tv0.tv_usec))) { |
||
250 | return 1; |
||
251 | } else { |
||
252 | return 0; |
||
253 | } |
||
254 | } |
||
255 | // tv1 assumed greater than tv0 |
||
256 | static double timeval_diff (struct timeval tv1, struct timeval tv0) { |
||
257 | double result; |
||
258 | if (tv1.tv_usec < tv0.tv_usec) { |
||
259 | tv1.tv_usec += MILLION; |
||
260 | tv1.tv_sec--; |
||
261 | } |
||
262 | result = (double) (((tv1.tv_sec - tv0.tv_sec) * MILLION) + (tv1.tv_usec - tv0.tv_usec)); |
||
263 | return result; |
||
264 | } |
||
265 | void delay_busyloop (unsigned long usec) { |
||
266 | struct timeval t1, t2; |
||
267 | gettimeofday( &t1, NULL ); |
||
268 | timeval_add_ulong(&t1, usec); |
||
269 | while (1) { |
||
270 | gettimeofday( &t2, NULL ); |
||
271 | if (timeval_greaterthan(t2, t1)) |
||
272 | break; |
||
273 | } |
||
274 | } |
||
275 | #ifdef HAVE_KALMAN |
||
276 | // Request units is microseconds |
||
277 | // Adjust units is microseconds |
||
278 | void delay_kalman (unsigned long usec) { |
||
279 | struct timeval t1, t2, finishtime; |
||
280 | long usec_adjusted; |
||
281 | double err; |
||
282 | static kalman_state kalmanerr={ |
||
283 | 0.00001, //q process noise covariance |
||
284 | 0.1, //r measurement noise covariance |
||
285 | 0.0, //x value, error predictio (units nanoseconds) |
||
286 | 1, //p estimation error covariance |
||
287 | 0.25 //k kalman gain |
||
288 | }; |
||
289 | // Get the current clock |
||
290 | gettimeofday( &t1, NULL ); |
||
291 | // Perform the kalman adjust per the predicted delay error |
||
292 | if (kalmanerr.x > 0) { |
||
293 | usec_adjusted = usec - (long) floor(kalmanerr.x); |
||
294 | if (usec_adjusted < 0) |
||
295 | usec_adjusted = 0; |
||
296 | } |
||
297 | else |
||
298 | usec_adjusted = usec + (long) floor(kalmanerr.x); |
||
299 | // Set the finishtime |
||
300 | finishtime = t1; |
||
301 | timeval_add_ulong(&finishtime, usec_adjusted); |
||
302 | # ifdef HAVE_NANOSLEEP |
||
303 | // Don't call nanosleep for values less than 10 microseconds |
||
304 | // as the syscall is too expensive. Let the busy loop |
||
305 | // provide the delay for times under that. |
||
306 | if (usec_adjusted > 10) { |
||
307 | struct timespec requested={0,0}, remaining; |
||
308 | timespec_add_ulong(&requested, (usec_adjusted * 1000)); |
||
309 | nanosleep(&requested, &remaining); |
||
310 | } |
||
311 | # endif |
||
312 | while (1) { |
||
313 | gettimeofday(&t2, NULL ); |
||
314 | if (timeval_greaterthan(t2, finishtime)) |
||
315 | break; |
||
316 | } |
||
317 | // Compute the delay error in units of microseconds |
||
318 | // and cast to type double |
||
319 | err = (double)(timeval_diff(t2, t1) - usec); |
||
320 | // printf("req: %ld adj: %ld err: %.5f (us)\n", usec, usec_adjusted, kalmanerr.x); |
||
321 | kalman_update(&kalmanerr, err); |
||
322 | } |
||
323 | #endif // Kalman |
||
324 | #endif |
||
325 | |||
326 |