clockwerk-opensim – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 vero 1 /*
2 * Copyright (c) Contributors, http://opensimulator.org/
3 * See CONTRIBUTORS.TXT for a full list of copyright holders.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the OpenSimulator Project nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27  
28 using System;
29 using System.Collections.Generic;
30 using System.Linq;
31 using System.Threading;
32 using log4net;
33  
34 namespace OpenSim.Framework.Monitoring
35 {
36 /// <summary>
37 /// Manages launching threads and keeping watch over them for timeouts
38 /// </summary>
39 public static class Watchdog
40 {
41 /// <summary>Timer interval in milliseconds for the watchdog timer</summary>
42 public const double WATCHDOG_INTERVAL_MS = 2500.0d;
43  
44 /// <summary>Default timeout in milliseconds before a thread is considered dead</summary>
45 public const int DEFAULT_WATCHDOG_TIMEOUT_MS = 5000;
46  
47 [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
48 public class ThreadWatchdogInfo
49 {
50 public Thread Thread { get; private set; }
51  
52 /// <summary>
53 /// Approximate tick when this thread was started.
54 /// </summary>
55 /// <remarks>
56 /// Not terribly good since this quickly wraps around.
57 /// </remarks>
58 public int FirstTick { get; private set; }
59  
60 /// <summary>
61 /// Last time this heartbeat update was invoked
62 /// </summary>
63 public int LastTick { get; set; }
64  
65 /// <summary>
66 /// Number of milliseconds before we notify that the thread is having a problem.
67 /// </summary>
68 public int Timeout { get; set; }
69  
70 /// <summary>
71 /// Is this thread considered timed out?
72 /// </summary>
73 public bool IsTimedOut { get; set; }
74  
75 /// <summary>
76 /// Will this thread trigger the alarm function if it has timed out?
77 /// </summary>
78 public bool AlarmIfTimeout { get; set; }
79  
80 /// <summary>
81 /// Method execute if alarm goes off. If null then no alarm method is fired.
82 /// </summary>
83 public Func<string> AlarmMethod { get; set; }
84  
85 /// <summary>
86 /// Stat structure associated with this thread.
87 /// </summary>
88 public Stat Stat { get; set; }
89  
90 public ThreadWatchdogInfo(Thread thread, int timeout, string name)
91 {
92 Thread = thread;
93 Timeout = timeout;
94 FirstTick = Environment.TickCount & Int32.MaxValue;
95 LastTick = FirstTick;
96  
97 Stat
98 = new Stat(
99 name,
100 string.Format("Last update of thread {0}", name),
101 "",
102 "ms",
103 "server",
104 "thread",
105 StatType.Pull,
106 MeasuresOfInterest.None,
107 stat => stat.Value = Environment.TickCount & Int32.MaxValue - LastTick,
108 StatVerbosity.Debug);
109  
110 StatsManager.RegisterStat(Stat);
111 }
112  
113 public ThreadWatchdogInfo(ThreadWatchdogInfo previousTwi)
114 {
115 Thread = previousTwi.Thread;
116 FirstTick = previousTwi.FirstTick;
117 LastTick = previousTwi.LastTick;
118 Timeout = previousTwi.Timeout;
119 IsTimedOut = previousTwi.IsTimedOut;
120 AlarmIfTimeout = previousTwi.AlarmIfTimeout;
121 AlarmMethod = previousTwi.AlarmMethod;
122 }
123  
124 public void Cleanup()
125 {
126 StatsManager.DeregisterStat(Stat);
127 }
128 }
129  
130 /// <summary>
131 /// This event is called whenever a tracked thread is
132 /// stopped or has not called UpdateThread() in time<
133 /// /summary>
134 public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
135  
136 /// <summary>
137 /// Is this watchdog active?
138 /// </summary>
139 public static bool Enabled
140 {
141 get { return m_enabled; }
142 set
143 {
144 // m_log.DebugFormat("[MEMORY WATCHDOG]: Setting MemoryWatchdog.Enabled to {0}", value);
145  
146 if (value == m_enabled)
147 return;
148  
149 m_enabled = value;
150  
151 if (m_enabled)
152 {
153 // Set now so we don't get alerted on the first run
154 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
155 }
156  
157 m_watchdogTimer.Enabled = m_enabled;
158 }
159 }
160 private static bool m_enabled;
161  
162 private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
163 private static Dictionary<int, ThreadWatchdogInfo> m_threads;
164 private static System.Timers.Timer m_watchdogTimer;
165  
166 /// <summary>
167 /// Last time the watchdog thread ran.
168 /// </summary>
169 /// <remarks>
170 /// Should run every WATCHDOG_INTERVAL_MS
171 /// </remarks>
172 public static int LastWatchdogThreadTick { get; private set; }
173  
174 static Watchdog()
175 {
176 m_threads = new Dictionary<int, ThreadWatchdogInfo>();
177 m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS);
178 m_watchdogTimer.AutoReset = false;
179 m_watchdogTimer.Elapsed += WatchdogTimerElapsed;
180 }
181  
182 /// <summary>
183 /// Start a new thread that is tracked by the watchdog timer.
184 /// </summary>
185 /// <param name="start">The method that will be executed in a new thread</param>
186 /// <param name="name">A name to give to the new thread</param>
187 /// <param name="priority">Priority to run the thread at</param>
188 /// <param name="isBackground">True to run this thread as a background thread, otherwise false</param>
189 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
190 /// <param name="log">If true then creation of thread is logged.</param>
191 /// <returns>The newly created Thread object</returns>
192 public static Thread StartThread(
193 ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, bool log = true)
194 {
195 return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, DEFAULT_WATCHDOG_TIMEOUT_MS, log);
196 }
197  
198 /// <summary>
199 /// Start a new thread that is tracked by the watchdog
200 /// </summary>
201 /// <param name="start">The method that will be executed in a new thread</param>
202 /// <param name="name">A name to give to the new thread</param>
203 /// <param name="priority">Priority to run the thread at</param>
204 /// <param name="isBackground">True to run this thread as a background
205 /// thread, otherwise false</param>
206 /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
207 /// <param name="alarmMethod">
208 /// Alarm method to call if alarmIfTimeout is true and there is a timeout.
209 /// Normally, this will just return some useful debugging information.
210 /// </param>
211 /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param>
212 /// <param name="log">If true then creation of thread is logged.</param>
213 /// <returns>The newly created Thread object</returns>
214 public static Thread StartThread(
215 ThreadStart start, string name, ThreadPriority priority, bool isBackground,
216 bool alarmIfTimeout, Func<string> alarmMethod, int timeout, bool log = true)
217 {
218 Thread thread = new Thread(start);
219 thread.Priority = priority;
220 thread.IsBackground = isBackground;
221  
222 ThreadWatchdogInfo twi
223 = new ThreadWatchdogInfo(thread, timeout, name)
224 { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod };
225  
226 if (log)
227 m_log.DebugFormat(
228 "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
229  
230 lock (m_threads)
231 m_threads.Add(twi.Thread.ManagedThreadId, twi);
232  
233 thread.Start();
234 thread.Name = name;
235  
236  
237 return thread;
238 }
239  
240 /// <summary>
241 /// Run the callback in a new thread immediately. If the thread exits with an exception log it but do
242 /// not propogate it.
243 /// </summary>
244 /// <param name="callback">Code for the thread to execute.</param>
245 /// <param name="name">Name of the thread</param>
246 /// <param name="obj">Object to pass to the thread.</param>
247 public static void RunInThread(WaitCallback callback, string name, object obj, bool log = false)
248 {
249 if (Util.FireAndForgetMethod == FireAndForgetMethod.RegressionTest)
250 {
251 Culture.SetCurrentCulture();
252 callback(obj);
253 return;
254 }
255  
256 ThreadStart ts = new ThreadStart(delegate()
257 {
258 try
259 {
260 Culture.SetCurrentCulture();
261 callback(obj);
262 Watchdog.RemoveThread(log:false);
263 }
264 catch (Exception e)
265 {
266 m_log.Error(string.Format("[WATCHDOG]: Exception in thread {0}.", name), e);
267 }
268 });
269  
270 StartThread(ts, name, ThreadPriority.Normal, true, false, log:log);
271 }
272  
273 /// <summary>
274 /// Marks the current thread as alive
275 /// </summary>
276 public static void UpdateThread()
277 {
278 UpdateThread(Thread.CurrentThread.ManagedThreadId);
279 }
280  
281 /// <summary>
282 /// Stops watchdog tracking on the current thread
283 /// </summary>
284 /// <param name="log">If true then normal events in thread removal are not logged.</param>
285 /// <returns>
286 /// True if the thread was removed from the list of tracked
287 /// threads, otherwise false
288 /// </returns>
289 public static bool RemoveThread(bool log = true)
290 {
291 return RemoveThread(Thread.CurrentThread.ManagedThreadId, log);
292 }
293  
294 private static bool RemoveThread(int threadID, bool log = true)
295 {
296 lock (m_threads)
297 {
298 ThreadWatchdogInfo twi;
299 if (m_threads.TryGetValue(threadID, out twi))
300 {
301 if (log)
302 m_log.DebugFormat(
303 "[WATCHDOG]: Removing thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
304  
305 twi.Cleanup();
306 m_threads.Remove(threadID);
307  
308 return true;
309 }
310 else
311 {
312 m_log.WarnFormat(
313 "[WATCHDOG]: Requested to remove thread with ID {0} but this is not being monitored", threadID);
314  
315 return false;
316 }
317 }
318 }
319  
320 public static bool AbortThread(int threadID)
321 {
322 lock (m_threads)
323 {
324 if (m_threads.ContainsKey(threadID))
325 {
326 ThreadWatchdogInfo twi = m_threads[threadID];
327 twi.Thread.Abort();
328 RemoveThread(threadID);
329  
330 return true;
331 }
332 else
333 {
334 return false;
335 }
336 }
337 }
338  
339 private static void UpdateThread(int threadID)
340 {
341 ThreadWatchdogInfo threadInfo;
342  
343 // Although TryGetValue is not a thread safe operation, we use a try/catch here instead
344 // of a lock for speed. Adding/removing threads is a very rare operation compared to
345 // UpdateThread(), and a single UpdateThread() failure here and there won't break
346 // anything
347 try
348 {
349 if (m_threads.TryGetValue(threadID, out threadInfo))
350 {
351 threadInfo.LastTick = Environment.TickCount & Int32.MaxValue;
352 threadInfo.IsTimedOut = false;
353 }
354 else
355 {
356 m_log.WarnFormat("[WATCHDOG]: Asked to update thread {0} which is not being monitored", threadID);
357 }
358 }
359 catch { }
360 }
361  
362 /// <summary>
363 /// Get currently watched threads for diagnostic purposes
364 /// </summary>
365 /// <returns></returns>
366 public static ThreadWatchdogInfo[] GetThreadsInfo()
367 {
368 lock (m_threads)
369 return m_threads.Values.ToArray();
370 }
371  
372 /// <summary>
373 /// Return the current thread's watchdog info.
374 /// </summary>
375 /// <returns>The watchdog info. null if the thread isn't being monitored.</returns>
376 public static ThreadWatchdogInfo GetCurrentThreadInfo()
377 {
378 lock (m_threads)
379 {
380 if (m_threads.ContainsKey(Thread.CurrentThread.ManagedThreadId))
381 return m_threads[Thread.CurrentThread.ManagedThreadId];
382 }
383  
384 return null;
385 }
386  
387 /// <summary>
388 /// Check watched threads. Fire alarm if appropriate.
389 /// </summary>
390 /// <param name="sender"></param>
391 /// <param name="e"></param>
392 private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
393 {
394 int now = Environment.TickCount & Int32.MaxValue;
395 int msElapsed = now - LastWatchdogThreadTick;
396  
397 if (msElapsed > WATCHDOG_INTERVAL_MS * 2)
398 m_log.WarnFormat(
399 "[WATCHDOG]: {0} ms since Watchdog last ran. Interval should be approximately {1} ms",
400 msElapsed, WATCHDOG_INTERVAL_MS);
401  
402 LastWatchdogThreadTick = Environment.TickCount & Int32.MaxValue;
403  
404 Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
405  
406 if (callback != null)
407 {
408 List<ThreadWatchdogInfo> callbackInfos = null;
409  
410 lock (m_threads)
411 {
412 foreach (ThreadWatchdogInfo threadInfo in m_threads.Values)
413 {
414 if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
415 {
416 RemoveThread(threadInfo.Thread.ManagedThreadId);
417  
418 if (callbackInfos == null)
419 callbackInfos = new List<ThreadWatchdogInfo>();
420  
421 callbackInfos.Add(threadInfo);
422 }
423 else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
424 {
425 threadInfo.IsTimedOut = true;
426  
427 if (threadInfo.AlarmIfTimeout)
428 {
429 if (callbackInfos == null)
430 callbackInfos = new List<ThreadWatchdogInfo>();
431  
432 // Send a copy of the watchdog info to prevent race conditions where the watchdog
433 // thread updates the monitoring info after an alarm has been sent out.
434 callbackInfos.Add(new ThreadWatchdogInfo(threadInfo));
435 }
436 }
437 }
438 }
439  
440 if (callbackInfos != null)
441 foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
442 callback(callbackInfo);
443 }
444  
445 if (MemoryWatchdog.Enabled)
446 MemoryWatchdog.Update();
447  
448 ChecksManager.CheckChecks();
449 StatsManager.RecordStats();
450  
451 m_watchdogTimer.Start();
452 }
453 }
454 }