summaryrefslogtreecommitdiff
path: root/src/lib/eina/eina_debug_monitor.c
blob: 324759f2a2a486104e904be4c53c97ceda302e56 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#include "eina_debug.h"

#ifdef EINA_HAVE_DEBUG

#define DEBUG_SERVER ".ecore/efl_debug/0"

extern pthread_t            _eina_debug_thread_mainloop;
extern volatile pthread_t  *_eina_debug_thread_active;
extern volatile int         _eina_debug_thread_active_num;

int                    _eina_debug_monitor_service_fd = -1;
Eina_Semaphore         _eina_debug_monitor_return_sem;

static Eina_Bool       _monitor_thread_runs = EINA_FALSE;
static pthread_t       _monitor_thread;

// _bt_buf[0] is always for mainloop, 1 + is for extra threads
static void             ***_bt_buf;
static int                *_bt_buf_len;
static struct timespec    *_bt_ts;
static int                *_bt_cpu;

// a backtracer that uses libunwind to do the job
static inline int
_eina_debug_unwind_bt(void **bt, int max)
{
   unw_cursor_t cursor;
   unw_context_t uc;
   unw_word_t p;
   int total;

   // create a context for unwinding
   unw_getcontext(&uc);
   // begin our work
   unw_init_local(&cursor, &uc);
   // walk up each stack frame until there is no more, storing it
   for (total = 0; (unw_step(&cursor) > 0) && (total < max); total++)
     {
        unw_get_reg(&cursor, UNW_REG_IP, &p);
        bt[total] = (void *)p;
     }
   // return our total backtrace stack size
   return total;
}

// this signal handler is called inside each and every thread when the
// thread gets a signal via pthread_kill(). this causes the thread to
// stop here inside this handler and "do something" then when this returns
// resume whatever it was doing like any signal handler
static void
_eina_debug_signal(int sig EINA_UNUSED,
                   siginfo_t *si EINA_UNUSED,
                   void *foo EINA_UNUSED)
{
   int i, slot = 0;
   pthread_t self = pthread_self();
   clockid_t cid;

   // find which slot in the array of threads we have so we store info
   // in the correct slot for us
   if (self != _eina_debug_thread_mainloop)
     {
        for (i = 0; i < _eina_debug_thread_active_num; i++)
          {
             if (self == _eina_debug_thread_active[i])
               {
                  slot = i + 1;
                  goto found;
               }
          }
        // we couldn't find out thread reference! help!
        fprintf(stderr, "EINA DEBUG ERROR: can't find thread slot!\n");
        eina_semaphore_release(&_eina_debug_monitor_return_sem, 1);
        return;
     }
found:
   // store thread info like what cpu core we are on now (not reliable
   // but hey - better than nothing), the amount of cpu time total
   // we have consumed (it's cumulative so subtracing deltas can give
   // you an average amount of cpu time consumed between now and the
   // previous time we looked) and also a full backtrace
   _bt_cpu[slot] = sched_getcpu();
   pthread_getcpuclockid(self, &cid);
   clock_gettime(cid, &(_bt_ts[slot]));
   _bt_buf_len[slot] = _eina_debug_unwind_bt(_bt_buf[slot], EINA_MAX_BT);
   // now wake up the monitor to let them know we are done collecting our
   // backtrace info
   eina_semaphore_release(&_eina_debug_monitor_return_sem, 1);
}

// we shall sue SIGPROF as out signal for pausing threads and having them
// dump a backtrace for polling based profiling
#define SIG SIGPROF

// a quick and dirty local time point getter func - not portable
static inline double
get_time(void)
{
   struct timeval timev;
   gettimeofday(&timev, NULL);
   return (double)timev.tv_sec + (((double)timev.tv_usec) / 1000000.0);
}

static void
_eina_debug_collect_bt(pthread_t pth)
{
   // this async signals the thread to switch to the deebug signal handler
   // and collect a backtrace and other info from inside the thread
   pthread_kill(pth, SIG);
}

// this is a DEDICATED debug thread to monitor the application so it works
// even if the mainloop is blocked or the app otherwise deadlocked in some
// way. this is an alternative to using external debuggers so we can get
// users or developers to get useful information about an app at all times
static void *
_eina_debug_monitor(void *data EINA_UNUSED)
{
   int bts = 0, ret, max_fd;
   double t0, t;
   fd_set rfds, wfds, exfds;
   struct timeval tv = { 0 };
   // some state for debugging
   unsigned int poll_time = 1000;
   Eina_Bool poll_on = EINA_FALSE;

   t0 = get_time();
   // sit forever processing commands or timeouts in the debug monitor
   // thread - this is separate to the rest of the app so it shouldn't
   // impact the application specifically
   for (;;)
     {
        int i;

        // set up data for select like read fd's
        FD_ZERO(&rfds);
        FD_ZERO(&wfds);
        FD_ZERO(&exfds);
        // the only fd we care about - out debug daemon connection
        FD_SET(_eina_debug_monitor_service_fd, &rfds);
        max_fd = _eina_debug_monitor_service_fd;
        // if we are in a polling mode then set up a timeout and wait for it
        if (poll_on)
          {
             if ((tv.tv_sec == 0) && (tv.tv_usec == 0))
               {
                  tv.tv_sec = 0;
                  tv.tv_usec = poll_time;
               }
             ret = select(max_fd + 1, &rfds, &wfds, &exfds, &tv);
          }
        // we have no timeout - so wait forever for a message from debugd
        else ret = select(max_fd + 1, &rfds, &wfds, &exfds, NULL);
        // if the fd for debug daemon says it's alive, process it
        if ((ret == 1) && (FD_ISSET(_eina_debug_monitor_service_fd, &rfds)))
          {
             // collect a single op on the debug daemon control fd
             char op[5];
             int size;
             unsigned char *data;

             // get the opcode and stor in op - guarantee its 0 byte terminated
             data = NULL;
             size = _eina_debug_monitor_service_read(op, &data);
             // if not negative - we have a real message
             if (size >= 0)
               {
                  // profiling on with poll time gap as uint payload
                  if (!strcmp(op, "PLON"))
                    {
                       if (size >= 4) memcpy(&poll_time, data, 4);
                       poll_on = EINA_TRUE;
                    }
                  // profiling off with no payload
                  else if (!strcmp(op, "PLOF"))
                    {
                       poll_time = 1000;
                       poll_on = EINA_FALSE;
                    }
                  // something we don't understand
                  else fprintf(stderr,
                               "EINA DEBUG ERROR: "
                               "Uunknown command %s\n", op);
                  free(data);
               }
             // major failure on debug daemon control fd - get out of here
             else goto fail;
          }

        if (poll_on)
          {
             // take a lock on grabbing thread debug info like backtraces
             eina_spinlock_take(&_eina_debug_thread_lock);
             // reset our "stack" of memory se use to dump thread info into
             _eina_debug_chunk_tmp_reset();
             // get an array of pointers for the backtrace array for main + th
             _bt_buf = _eina_debug_chunk_tmp_push
               ((1 + _eina_debug_thread_active_num) * sizeof(void *));
             if (!_bt_buf) goto err;
             // get an array of pointers for the timespec array for mainloop + th
             _bt_ts = _eina_debug_chunk_tmp_push
               ((1 + _eina_debug_thread_active_num) * sizeof(struct timespec));
             if (!_bt_ts) goto err;
             // get an array of pointers for the cpuid array for mainloop + th
             _bt_cpu = _eina_debug_chunk_tmp_push
               ((1 + _eina_debug_thread_active_num) * sizeof(int));
             if (!_bt_cpu) goto err;
             // now get an array of void pts for mainloop bt
             _bt_buf[0] = _eina_debug_chunk_tmp_push(EINA_MAX_BT * sizeof(void *));
             if (!_bt_buf[0]) goto err;
             // get an array of void ptrs for each thread we know about for bt
             for (i = 0; i < _eina_debug_thread_active_num; i++)
               {
                  _bt_buf[i + 1] = _eina_debug_chunk_tmp_push(EINA_MAX_BT * sizeof(void *));
                  if (!_bt_buf[i + 1]) goto err;
               }
             // get an array of ints to stor the bt len for mainloop + threads
             _bt_buf_len = _eina_debug_chunk_tmp_push
               ((1 + _eina_debug_thread_active_num) * sizeof(int));
             // collect bt from the mainloop - always there
             _eina_debug_collect_bt(_eina_debug_thread_mainloop);
             // now collect per thread
             for (i = 0; i < _eina_debug_thread_active_num; i++)
               _eina_debug_collect_bt(_eina_debug_thread_active[i]);
             // we're done probing. now collec all the "i'm done" msgs on the
             // semaphore for every thread + mainloop
             for (i = 0; i < (_eina_debug_thread_active_num + 1); i++)
               eina_semaphore_lock(&_eina_debug_monitor_return_sem);
             // we now have gotten all the data from all threadd + mainloop.
             // we can process it now as we see fit, so release thread lock
//// XXX: some debug so we can see the bt's we collect - will go
//             for (i = 0; i < (_eina_debug_thread_active_num + 1); i++)
//               {
//                  _eina_debug_dump_fhandle_bt(stderr, _bt_buf[i], _bt_buf_len[i]);
//               }
err:
             eina_spinlock_release(&_eina_debug_thread_lock);
//// XXX: some debug just to see how well we perform - will go
             bts++;
             if (bts >= 10000)
               {
                  t = get_time();
                  fprintf(stderr, "%1.5f bt's per sec\n", (double)bts / (t - t0));
                  t0 = t;
                  bts = 0;
               }
          }
     }
fail:
   // we failed - get out of here and disconnect to debugd
   close(_eina_debug_monitor_service_fd);
   _eina_debug_monitor_service_fd = -1;
   return NULL;
}

// start up the debug monitor if we haven't already
void
_eina_debug_monitor_thread_start(void)
{
   int err;

   // if it's already running - we're good.
   if (_monitor_thread_runs) return;
   // create debug monitor thread
   err = pthread_create(&_monitor_thread, NULL, _eina_debug_monitor, NULL);
   if (err != 0)
     {
        fprintf(stderr, "EINA DEBUG ERROR: Can't create debug thread!\n");
        abort();
     }
   else _monitor_thread_runs = EINA_TRUE;
}

void
_eina_debug_monitor_signal_init(void)
{
   struct sigaction sa;

   // set up signal handler for our profiling signal - eevery thread should
   // obey this (this is the case on linux - other OSs may vary)
   sa.sa_sigaction = _eina_debug_signal;
   sa.sa_flags = SA_RESTART | SA_SIGINFO;
   sigemptyset(&sa.sa_mask);
   if (sigaction(SIG, &sa, NULL) != 0)
     fprintf(stderr, "EINA DEBUG ERROR: Can't set up sig %i handler!\n", SIG);
}

static const char *
_socket_home_get()
{
   // get possible debug daemon socket directory base
   const char *dir = getenv("XDG_RUNTIME_DIR");
   if (!dir) dir = getenv("HOME");
   if (!dir) dir = getenv("TMPDIR");
   if (!dir) dir = "/tmp";
   return dir;
}

// connect to efl_debugd
void
_eina_debug_monitor_service_connect(void)
{
   char buf[4096];
   int fd, socket_unix_len, curstate = 0;
   struct sockaddr_un socket_unix;

   // try this socket file - it will likely be:
   //   ~/.ecore/efl_debug/0
   // or maybe
   //   /var/run/UID/.ecore/efl_debug/0
   // either way a 4k buffer should be ebough ( if it's not we're on an
   // insane system)
   snprintf(buf, sizeof(buf), "%s/%s", _socket_home_get(), DEBUG_SERVER);
   // create the socket
   fd = socket(AF_UNIX, SOCK_STREAM, 0);
   if (fd < 0) goto err;
   // set the socket to close when we exec things so they don't inherit it
   if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) goto err;
   // set up some socket options on addr re-use
   if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&curstate,
                  sizeof(curstate)) < 0)
     goto err;
   // sa that it's a unix socket and where the path is
   socket_unix.sun_family = AF_UNIX;
   strncpy(socket_unix.sun_path, buf, sizeof(socket_unix.sun_path));
#define LENGTH_OF_SOCKADDR_UN(s) \
   (strlen((s)->sun_path) + (size_t)(((struct sockaddr_un *)NULL)->sun_path))
   socket_unix_len = LENGTH_OF_SOCKADDR_UN(&socket_unix);
   // actually conenct to efl_debugd service
   if (connect(fd, (struct sockaddr *)&socket_unix, socket_unix_len) < 0)
     goto err;
   // we succeeded - store fd
   _eina_debug_monitor_service_fd = fd;
   return;
err:
   // some kind of connection failure here, so close a valid socket and
   // get out of here
   if (fd >= 0) close(fd);
}
#endif