nexmon – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /*---------------------------------------------------------------
2 * Copyright (c) 1999,2000,2001,2002,2003
3 * The Board of Trustees of the University of Illinois
4 * All Rights Reserved.
5 *---------------------------------------------------------------
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software (Iperf) and associated
8 * documentation files (the "Software"), to deal in the Software
9 * without restriction, including without limitation the
10 * rights to use, copy, modify, merge, publish, distribute,
11 * sublicense, and/or sell copies of the Software, and to permit
12 * persons to whom the Software is furnished to do
13 * so, subject to the following conditions:
14 *
15 *
16 * Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and
18 * the following disclaimers.
19 *
20 *
21 * Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimers in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 *
27 * Neither the names of the University of Illinois, NCSA,
28 * nor the names of its contributors may be used to endorse
29 * or promote products derived from this Software without
30 * specific prior written permission.
31 *
32 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
33 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
34 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
35 * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTIBUTORS OR COPYRIGHT
36 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
37 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE
39 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 * ________________________________________________________________
41 * National Laboratory for Applied Network Research
42 * National Center for Supercomputing Applications
43 * University of Illinois at Urbana-Champaign
44 * http://www.ncsa.uiuc.edu
45 * ________________________________________________________________
46 *
47 * delay.c
48 * by Mark Gates <mgates@nlanr.net>
49 * updates
50 * by Robert J. McMahon <rmcmahon@broadcom.com> <rjmcmahon@rjmcmahon.com>
51 * -------------------------------------------------------------------
52 * attempts at accurate microsecond delays
53 * ------------------------------------------------------------------- */
54 #include "headers.h"
55 #include "util.h"
56 #include "delay.h"
57 #include <math.h>
58  
59 #define MILLION 1000000
60 #define BILLION 1000000000
61  
62 /* -------------------------------------------------------------------
63 * A micro-second delay function
64 * o Use a busy loop or nanosleep
65 *
66 * Some notes:
67 * o clock_gettime() (if available) is preferred over gettimeofday()
68 * as it give nanosecond resolution and should be more efficient.
69 * It also supports CLOCK_MONOTONIC and CLOCK_MONOTONIC_RAW
70 * though CLOCK_REALTIME is being used by the code.
71 * o This code does not use Timestamp object, as the goal of these
72 * functions is accurate delays (vs accurate timestamps.)
73 * o The syscalls such as nanosleep guarantee at least the request time
74 * and can and will delay longer, particularly due to things like context
75 * switching, causing the delay to lose accuracy
76 * o Kalman filtering is used to predict delay error which in turn
77 * is used to adjust the delay, hopefully mitigating the above.
78 * Note: This can cause the delay to return faster than the request,
79 * i.e. the *at least* guarantee is not preserved for the kalman
80 * adjusted delay calls.
81 * o Remember, the Client is keeping a running average delay for the
82 * thread so errors in delay will also be adjusted there. (Assuming
83 * it's possible. It's not really possible at top line link rates
84 * because lost time can't be made up for by speeding up the transmits.
85 * Hence, don't lose time with delay calls which error on the side of
86 * taking too long. Kalman should help much here.)
87 *
88 * POSIX nanosleep(). This allows a higher timing resolution
89 * (under Linux e.g. it uses hrtimers), does not affect any signals,
90 * and will use up remaining time when interrupted.
91 * ------------------------------------------------------------------- */
92  
93 void delay_loop(unsigned long usec)
94 {
95 #ifdef HAVE_KALMAN
96 delay_kalman(usec);
97 #else
98 #ifdef HAVE_NANOSLEEP
99 delay_nanosleep(usec);
100 #else
101 delay_busyloop(usec);
102 #endif
103 #endif
104 }
105  
106 #ifdef HAVE_NANOSLEEP
107 // Can use the nanosleep syscall suspending the thread
108 void delay_nanosleep (unsigned long usec) {
109 struct timespec requested, remaining;
110 requested.tv_sec = 0;
111 requested.tv_nsec = usec * 1000L;
112 // Note, signals will cause the nanosleep
113 // to return early. That's fine.
114 nanosleep(&requested, &remaining);
115 }
116 #endif
117  
118 #if defined (HAVE_NANOSLEEP) || defined (HAVE_CLOCK_GETTIME)
119 static void timespec_add_ulong (struct timespec *tv0, unsigned long value) {
120 tv0->tv_nsec += value;
121 if (tv0->tv_nsec >= BILLION) {
122 tv0->tv_sec++;
123 tv0->tv_nsec -= BILLION;
124 }
125 }
126 #endif
127  
128 #ifdef HAVE_KALMAN
129 // Kalman versions attempt to support delay request
130 // accuracy over a minimum guaranteed delay by
131 // prediciting the delay error. This is
132 // the basic recursive algorithm.
133 static void kalman_update (kalman_state *state, double measurement) {
134 //prediction update
135 state->p = state->p + state->q;
136 //measurement update
137 state->k = state->p / (state->p + state->r);
138 state->x = state->x + (state->k * (measurement - state->x));
139 state->p = (1 - state->k) * state->p;
140 }
141 #endif
142  
143 #ifdef HAVE_CLOCK_GETTIME
144 // Delay calls for systems with clock_gettime
145 // Working units are nanoseconds and structures are timespec
146 static void timespec_add_double (struct timespec *tv0, double value) {
147 tv0->tv_nsec += (unsigned long) value;
148 if (tv0->tv_nsec >= BILLION) {
149 tv0->tv_sec++;
150 tv0->tv_nsec -= BILLION;
151 }
152 }
153 // tv1 assumed greater than tv0
154 static double timespec_diff (struct timespec tv1, struct timespec tv0) {
155 double result;
156 if (tv1.tv_nsec < tv0.tv_nsec) {
157 tv1.tv_nsec += BILLION;
158 tv1.tv_sec--;
159 }
160 result = (double) (((tv1.tv_sec - tv0.tv_sec) * BILLION) + (tv1.tv_nsec - tv0.tv_nsec));
161 return result;
162 }
163 static void timespec_add( struct timespec *tv0, struct timespec *tv1)
164 {
165 tv0->tv_sec += tv1->tv_sec;
166 tv0->tv_nsec += tv1->tv_nsec;
167 if ( tv0->tv_nsec >= BILLION ) {
168 tv0->tv_nsec -= BILLION;
169 tv0->tv_sec++;
170 }
171 }
172 static inline
173 int timespec_greaterthan(struct timespec tv1, struct timespec tv0) {
174 if (tv1.tv_sec > tv0.tv_sec || \
175 ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_nsec > tv0.tv_nsec))) {
176 return 1;
177 } else {
178 return 0;
179 }
180 }
181 // A cpu busy loop for systems with clock_gettime
182 void delay_busyloop (unsigned long usec) {
183 struct timespec t1, t2;
184 clock_gettime(CLOCK_REALTIME, &t1);
185 timespec_add_ulong(&t1, (usec * 1000L));
186 while (1) {
187 clock_gettime(CLOCK_REALTIME, &t2);
188 if (timespec_greaterthan(t2, t1))
189 break;
190 }
191 }
192 // Kalman routines for systems with clock_gettime
193 #ifdef HAVE_KALMAN
194 // Request units is microseconds
195 // Adjust units is nanoseconds
196 void delay_kalman (unsigned long usec) {
197 struct timespec t1, t2, finishtime, requested={0,0}, remaining;
198 double nsec_adjusted, err;
199 static kalman_state kalmanerr={
200 0.00001, //q process noise covariance
201 0.1, //r measurement noise covariance
202 0.0, //x value, error predictio (units nanoseconds)
203 1, //p estimation error covariance
204 0.75 //k kalman gain
205 };
206 // Get the current clock
207 clock_gettime(CLOCK_REALTIME, &t1);
208 // Perform the kalman adjust per the predicted delay error
209 nsec_adjusted = (usec * 1000.0) - kalmanerr.x;
210 // Set a timespec to be used by the nanosleep
211 // as well as for the finished time calculation
212 timespec_add_double(&requested, nsec_adjusted);
213 // Set the finish time in timespec format
214 finishtime = t1;
215 timespec_add(&finishtime, &requested);
216 # ifdef HAVE_NANOSLEEP
217 // Don't call nanosleep for values less than 10 microseconds
218 // as the syscall is too expensive. Let the busy loop
219 // provide the delay for times under that.
220 if (nsec_adjusted > 10000) {
221 nanosleep(&requested, &remaining);
222 }
223 # endif
224 while (1) {
225 clock_gettime(CLOCK_REALTIME, &t2);
226 if (timespec_greaterthan(t2, finishtime))
227 break;
228 }
229 // Compute the delay error in units of nanoseconds
230 // and cast to type double
231 err = (double) (timespec_diff(t2, t1) - (usec * 1000));
232 // printf("req: %ld adj: %f err: %.5f (ns)\n", usec, nsec_adjusted, kalmanerr.x);
233 kalman_update(&kalmanerr, err);
234 }
235 #endif // HAVE_KALMAN
236 #else
237 // Sadly, these systems must use the not so efficient gettimeofday()
238 // and working units are microseconds, struct is timeval
239 static void timeval_add_ulong (struct timeval *tv0, unsigned long value) {
240 tv0->tv_usec += value;
241 if (tv0->tv_usec >= MILLION) {
242 tv0->tv_sec++;
243 tv0->tv_usec -= MILLION;
244 }
245 }
246 static inline
247 int timeval_greaterthan(struct timeval tv1, struct timeval tv0) {
248 if (tv1.tv_sec > tv0.tv_sec || \
249 ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_usec > tv0.tv_usec))) {
250 return 1;
251 } else {
252 return 0;
253 }
254 }
255 // tv1 assumed greater than tv0
256 static double timeval_diff (struct timeval tv1, struct timeval tv0) {
257 double result;
258 if (tv1.tv_usec < tv0.tv_usec) {
259 tv1.tv_usec += MILLION;
260 tv1.tv_sec--;
261 }
262 result = (double) (((tv1.tv_sec - tv0.tv_sec) * MILLION) + (tv1.tv_usec - tv0.tv_usec));
263 return result;
264 }
265 void delay_busyloop (unsigned long usec) {
266 struct timeval t1, t2;
267 gettimeofday( &t1, NULL );
268 timeval_add_ulong(&t1, usec);
269 while (1) {
270 gettimeofday( &t2, NULL );
271 if (timeval_greaterthan(t2, t1))
272 break;
273 }
274 }
275 #ifdef HAVE_KALMAN
276 // Request units is microseconds
277 // Adjust units is microseconds
278 void delay_kalman (unsigned long usec) {
279 struct timeval t1, t2, finishtime;
280 long usec_adjusted;
281 double err;
282 static kalman_state kalmanerr={
283 0.00001, //q process noise covariance
284 0.1, //r measurement noise covariance
285 0.0, //x value, error predictio (units nanoseconds)
286 1, //p estimation error covariance
287 0.25 //k kalman gain
288 };
289 // Get the current clock
290 gettimeofday( &t1, NULL );
291 // Perform the kalman adjust per the predicted delay error
292 if (kalmanerr.x > 0) {
293 usec_adjusted = usec - (long) floor(kalmanerr.x);
294 if (usec_adjusted < 0)
295 usec_adjusted = 0;
296 }
297 else
298 usec_adjusted = usec + (long) floor(kalmanerr.x);
299 // Set the finishtime
300 finishtime = t1;
301 timeval_add_ulong(&finishtime, usec_adjusted);
302 # ifdef HAVE_NANOSLEEP
303 // Don't call nanosleep for values less than 10 microseconds
304 // as the syscall is too expensive. Let the busy loop
305 // provide the delay for times under that.
306 if (usec_adjusted > 10) {
307 struct timespec requested={0,0}, remaining;
308 timespec_add_ulong(&requested, (usec_adjusted * 1000));
309 nanosleep(&requested, &remaining);
310 }
311 # endif
312 while (1) {
313 gettimeofday(&t2, NULL );
314 if (timeval_greaterthan(t2, finishtime))
315 break;
316 }
317 // Compute the delay error in units of microseconds
318 // and cast to type double
319 err = (double)(timeval_diff(t2, t1) - usec);
320 // printf("req: %ld adj: %ld err: %.5f (us)\n", usec, usec_adjusted, kalmanerr.x);
321 kalman_update(&kalmanerr, err);
322 }
323 #endif // Kalman
324 #endif
325  
326