nexmon – Rev 1

Subversion Repositories:
Rev:
/*--------------------------------------------------------------- 
 * Copyright (c) 1999,2000,2001,2002,2003                              
 * The Board of Trustees of the University of Illinois            
 * All Rights Reserved.                                           
 *--------------------------------------------------------------- 
 * Permission is hereby granted, free of charge, to any person    
 * obtaining a copy of this software (Iperf) and associated       
 * documentation files (the "Software"), to deal in the Software  
 * without restriction, including without limitation the          
 * rights to use, copy, modify, merge, publish, distribute,        
 * sublicense, and/or sell copies of the Software, and to permit     
 * persons to whom the Software is furnished to do
 * so, subject to the following conditions: 
 *
 *     
 * Redistributions of source code must retain the above 
 * copyright notice, this list of conditions and 
 * the following disclaimers. 
 *
 *     
 * Redistributions in binary form must reproduce the above 
 * copyright notice, this list of conditions and the following 
 * disclaimers in the documentation and/or other materials 
 * provided with the distribution. 
 * 
 *     
 * Neither the names of the University of Illinois, NCSA, 
 * nor the names of its contributors may be used to endorse 
 * or promote products derived from this Software without
 * specific prior written permission. 
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTIBUTORS OR COPYRIGHT 
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
 * ________________________________________________________________
 * National Laboratory for Applied Network Research 
 * National Center for Supercomputing Applications 
 * University of Illinois at Urbana-Champaign 
 * http://www.ncsa.uiuc.edu
 * ________________________________________________________________ 
 *
 * delay.c
 * by Mark Gates <mgates@nlanr.net>
 * updates 
 * by Robert J. McMahon <rmcmahon@broadcom.com> <rjmcmahon@rjmcmahon.com>
 * -------------------------------------------------------------------
 * attempts at accurate microsecond delays
 * ------------------------------------------------------------------- */
#include "headers.h"
#include "util.h"
#include "delay.h"
#include <math.h>

#define MILLION 1000000
#define BILLION 1000000000

/* -------------------------------------------------------------------
 * A micro-second delay function
 * o Use a busy loop or nanosleep
 * 
 * Some notes:
 * o clock_gettime() (if available) is preferred over gettimeofday() 
 *   as it give nanosecond resolution and should be more efficient.
 *   It also supports CLOCK_MONOTONIC and CLOCK_MONOTONIC_RAW
 *   though CLOCK_REALTIME is being used by the code.  
 * o This code does not use Timestamp object, as the goal of these 
 *   functions is accurate delays (vs accurate timestamps.)
 * o The syscalls such as nanosleep guarantee at least the request time
 *   and can and will delay longer, particularly due to things like context 
 *   switching, causing the delay to lose accuracy
 * o Kalman filtering is used to predict delay error which in turn
 *   is used to adjust the delay, hopefully mitigating the above.  
 *   Note:  This can cause the delay to return faster than the request,
 *   i.e. the *at least* guarantee is not preserved for the kalman
 *   adjusted delay calls.
 * o Remember, the Client is keeping a running average delay for the 
 *   thread so errors in delay will also be adjusted there. (Assuming 
 *   it's possible.  It's not really possible at top line link rates 
 *   because lost time can't be made up for by speeding up the transmits.  
 *   Hence, don't lose time with delay calls which error on the side of 
 *   taking too long.  Kalman should help much here.)
 * 
 * POSIX nanosleep(). This allows a higher timing resolution 
 * (under Linux e.g. it uses hrtimers), does not affect any signals, 
 * and will use up remaining time when interrupted.
 * ------------------------------------------------------------------- */

void delay_loop(unsigned long usec)
{
#ifdef HAVE_KALMAN
    delay_kalman(usec);
#else
#ifdef HAVE_NANOSLEEP
    delay_nanosleep(usec);
#else
    delay_busyloop(usec);
#endif
#endif
}

#ifdef HAVE_NANOSLEEP
// Can use the nanosleep syscall suspending the thread
void delay_nanosleep (unsigned long usec) {
    struct timespec requested, remaining;
    requested.tv_sec  = 0;
    requested.tv_nsec = usec * 1000L;
    // Note, signals will cause the nanosleep
    // to return early.  That's fine.
    nanosleep(&requested, &remaining);
}
#endif

#if defined (HAVE_NANOSLEEP) || defined (HAVE_CLOCK_GETTIME)
static void timespec_add_ulong (struct timespec *tv0, unsigned long value) {
    tv0->tv_nsec += value;
    if (tv0->tv_nsec >= BILLION) {
        tv0->tv_sec++;
        tv0->tv_nsec -= BILLION;
    }
}
#endif

#ifdef HAVE_KALMAN
// Kalman versions attempt to support delay request
// accuracy over a minimum guaranteed delay by
// prediciting the delay error. This is
// the basic recursive algorithm. 
static void kalman_update (kalman_state *state, double measurement) {
    //prediction update
    state->p = state->p + state->q;
    //measurement update
    state->k = state->p / (state->p + state->r);
    state->x = state->x + (state->k * (measurement - state->x));
    state->p = (1 - state->k) * state->p; 
}
#endif

#ifdef HAVE_CLOCK_GETTIME
// Delay calls for systems with clock_gettime
// Working units are nanoseconds and structures are timespec
static void timespec_add_double (struct timespec *tv0, double value) {
    tv0->tv_nsec += (unsigned long) value;
    if (tv0->tv_nsec >= BILLION) {
        tv0->tv_sec++;
        tv0->tv_nsec -= BILLION;
    }
}
// tv1 assumed greater than tv0
static double timespec_diff (struct timespec tv1, struct timespec tv0) {
    double result;
    if (tv1.tv_nsec < tv0.tv_nsec) {
        tv1.tv_nsec += BILLION;
        tv1.tv_sec--;
    }
    result = (double) (((tv1.tv_sec - tv0.tv_sec) * BILLION) + (tv1.tv_nsec - tv0.tv_nsec));
    return result;
}
static void timespec_add( struct timespec *tv0, struct timespec *tv1)
{
    tv0->tv_sec += tv1->tv_sec;
    tv0->tv_nsec += tv1->tv_nsec;
    if ( tv0->tv_nsec >= BILLION ) {
        tv0->tv_nsec -= BILLION;
        tv0->tv_sec++;
    }
}
static inline 
int timespec_greaterthan(struct timespec tv1, struct timespec tv0) {
    if (tv1.tv_sec > tv0.tv_sec ||                                      \
        ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_nsec > tv0.tv_nsec))) {
        return 1;
    } else {
        return 0;
    }
}
// A cpu busy loop for systems with clock_gettime
void delay_busyloop (unsigned long usec) {
    struct timespec t1, t2;
    clock_gettime(CLOCK_REALTIME, &t1);
    timespec_add_ulong(&t1, (usec * 1000L)); 
    while (1) {
        clock_gettime(CLOCK_REALTIME, &t2);
        if (timespec_greaterthan(t2, t1)) 
            break;
    }
}
// Kalman routines for systems with clock_gettime
#ifdef HAVE_KALMAN 
// Request units is microseconds
// Adjust units is nanoseconds
void delay_kalman (unsigned long usec) {
    struct timespec t1, t2, finishtime, requested={0,0}, remaining;
    double nsec_adjusted, err;
    static kalman_state kalmanerr={
        0.00001, //q process noise covariance
        0.1, //r measurement noise covariance
        0.0, //x value, error predictio (units nanoseconds)
        1, //p estimation error covariance
        0.75 //k kalman gain
    };
    // Get the current clock
    clock_gettime(CLOCK_REALTIME, &t1);
    // Perform the kalman adjust per the predicted delay error
    nsec_adjusted = (usec * 1000.0) - kalmanerr.x;
    // Set a timespec to be used by the nanosleep
    // as well as for the finished time calculation
    timespec_add_double(&requested, nsec_adjusted);
    // Set the finish time in timespec format
    finishtime = t1;
    timespec_add(&finishtime, &requested);
#  ifdef HAVE_NANOSLEEP
    // Don't call nanosleep for values less than 10 microseconds
    // as the syscall is too expensive.  Let the busy loop
    // provide the delay for times under that.
    if (nsec_adjusted > 10000) {
        nanosleep(&requested, &remaining);
    }
#  endif
    while (1) {
        clock_gettime(CLOCK_REALTIME, &t2);
        if (timespec_greaterthan(t2, finishtime)) 
            break;
    }
    // Compute the delay error in units of nanoseconds
    // and cast to type double
    err = (double) (timespec_diff(t2, t1) - (usec * 1000));
    // printf("req: %ld adj: %f err: %.5f (ns)\n", usec, nsec_adjusted, kalmanerr.x);  
    kalman_update(&kalmanerr, err);
}
#endif // HAVE_KALMAN
#else
// Sadly, these systems must use the not so efficient gettimeofday()
// and working units are microseconds, struct is timeval
static void timeval_add_ulong (struct timeval *tv0, unsigned long value) {
    tv0->tv_usec += value;
    if (tv0->tv_usec >= MILLION) {
        tv0->tv_sec++;
        tv0->tv_usec -= MILLION;
    }
}
static inline 
int timeval_greaterthan(struct timeval tv1, struct timeval tv0) {
    if (tv1.tv_sec > tv0.tv_sec ||                                      \
        ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_usec > tv0.tv_usec))) {
        return 1;
    } else {
        return 0;
    }
}
// tv1 assumed greater than tv0
static double timeval_diff (struct timeval tv1, struct timeval tv0) {
    double result;
    if (tv1.tv_usec < tv0.tv_usec) {
        tv1.tv_usec += MILLION;
        tv1.tv_sec--;
    }
    result = (double) (((tv1.tv_sec - tv0.tv_sec) * MILLION) + (tv1.tv_usec - tv0.tv_usec));
    return result;
}
void delay_busyloop (unsigned long usec) {
    struct timeval t1, t2;
    gettimeofday( &t1, NULL );
    timeval_add_ulong(&t1, usec); 
    while (1) { 
        gettimeofday( &t2, NULL );
        if (timeval_greaterthan(t2, t1)) 
            break;
    }
}
#ifdef HAVE_KALMAN 
// Request units is microseconds
// Adjust units is microseconds
void delay_kalman (unsigned long usec) {
    struct timeval t1, t2, finishtime;
    long usec_adjusted; 
    double err;
    static kalman_state kalmanerr={
        0.00001, //q process noise covariance
        0.1, //r measurement noise covariance
        0.0, //x value, error predictio (units nanoseconds)
        1, //p estimation error covariance
        0.25 //k kalman gain
    };
    // Get the current clock
    gettimeofday( &t1, NULL );
    // Perform the kalman adjust per the predicted delay error
    if (kalmanerr.x > 0) {
        usec_adjusted = usec - (long) floor(kalmanerr.x);
        if (usec_adjusted < 0) 
            usec_adjusted = 0;
    }
    else 
        usec_adjusted = usec + (long) floor(kalmanerr.x);
    // Set the finishtime
    finishtime = t1;
    timeval_add_ulong(&finishtime, usec_adjusted);
#  ifdef HAVE_NANOSLEEP
    // Don't call nanosleep for values less than 10 microseconds
    // as the syscall is too expensive.  Let the busy loop
    // provide the delay for times under that.
    if (usec_adjusted > 10) {
        struct timespec requested={0,0}, remaining;
        timespec_add_ulong(&requested, (usec_adjusted * 1000));
        nanosleep(&requested, &remaining);
    }
#  endif
    while (1) {
        gettimeofday(&t2, NULL );
        if (timeval_greaterthan(t2, finishtime)) 
            break;
    }
    // Compute the delay error in units of microseconds
    // and cast to type double
    err = (double)(timeval_diff(t2, t1)  - usec);
    // printf("req: %ld adj: %ld err: %.5f (us)\n", usec, usec_adjusted, kalmanerr.x);  
    kalman_update(&kalmanerr, err);
}
#endif // Kalman
#endif