Linux 应用程序调试技术/资源泄漏
外观
任何已终止但未被加入或分离的线程都会泄漏操作系统资源,直到进程终止。不幸的是,无论是/proc还是gdb都不会显示这些僵尸线程,至少在某些内核上不会。
一种获取它们的方法是使用 gdb 预定义命令
#
#
#
define trace_call
b $arg0
commands
bt full
continue
end
end
document trace_call
Trace specified call with call stack to screen. Example:
set breakpoint pending on
set pagination off
set logging on
trace_call __pthread_create_2_1
end
Using host libthread_db library "/lib/i686/cmov/libthread_db.so.1".
(gdb) trace_call __pthread_create_2_1
Function "__pthread_create_2_1" not defined.
Breakpoint 1 (__pthread_create_2_1) pending.
(gdb) trace_call __pthread_create_2_0
Function "__pthread_create_2_0" not defined.
Breakpoint 2 (__pthread_create_2_0) pending.
(gdb) r
Starting program: /home/amelinte/projects/articole/wikibooks/debug/plock foo bar bax
[Thread debugging using libthread_db enabled]
Breakpoint 3 at 0xb7f9b746
Pending breakpoint "__pthread_create_2_1" resolved
Breakpoint 4 at 0xb7f9c395
Pending breakpoint "__pthread_create_2_0" resolved
[New Thread 0xb7e48ad0 (LWP 8635)]
[Switching to Thread 0xb7e48ad0 (LWP 8635)]
Breakpoint 3, 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
#0 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
No symbol table info available.
#1 0x08048a7f in main (argc=4, argv=0xbfceb714) at plock.c:97
s = 0
tnum = 0
opt = -1
num_threads = 3
tinfo = (struct thread_info *) 0x833b008
attr = {__size = '\0' <repeats 13 times>, "\020", '\0' <repeats 21 times>, __align = 0}
stack_size = -1
res = (void *) 0x0
[New Thread 0xb7e47b90 (LWP 8638)]
Thread 1: top of stack near 0xb7e473c8; argv_string=foo
另一种方法是使用(同样)一个中间层库
/*
* Hook library. Usage:
* gcc -c -g -Wall -fPIC libhook.c -o libhook.o
* ld -o libhook.so libhook.o -shared -ldl
* LD_PRELOAD=./libhook.so program arguments
*
* Copyright 2012 Aurelian Melinte.
* Released under GPL 3.0 or later.
*/
#define _GNU_SOURCE
#include <dlfcn.h>
#include <signal.h>
#include <execinfo.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h> /*printf*/
#include <unistd.h>
#include <pthread.h>
#include <assert.h>
typedef int (*lp_pthread_mutex_func)(pthread_mutex_t *mutex);
typedef int (*pthread_create_func)(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg);
static pthread_create_func _pthread_create_hook = NULL;
static int
hook_one(pthread_create_func *fptr, const char *fname)
{
char *msg = NULL;
assert(fname != NULL);
if (*fptr == NULL) {
printf("dlsym : wrapping %s\n", fname);
*fptr = dlsym(RTLD_NEXT, fname);
printf("next_%s = %p\n", fname, *fptr);
if ((*fptr == NULL) || ((msg = dlerror()) != NULL)) {
printf("dlsym %s failed : %s\n", fname, msg);
return -1;
} else {
printf("dlsym: wrapping %s done\n", fname);
return 0;
}
} else {
return 0;
}
}
static void
hook_funcs(void)
{
if (_pthread_create_hook == NULL) {
int rc = hook_one(&_pthread_create_hook, "pthread_create");
if (NULL == _pthread_create_hook || rc != 0) {
printf("Failed to hook.\n");
exit(EXIT_FAILURE);
}
}
}
/*
*
*/
int
pthread_create(pthread_t *thread,
const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg)
{
#define SIZE 40
void *buffer[SIZE] = {0};
int nptrs = 0;
int rc = EINVAL;
rc = _pthread_create_hook(thread, attr, start_routine, arg);
printf("*** pthread_create:\n");
nptrs = backtrace(buffer, SIZE);
backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO);
return rc;
}
/*
*
*/
void _init() __attribute__((constructor));
void
_init()
{
printf("*** _init().\n");
hook_funcs();
}
void _fini() __attribute__((destructor));
void
_fini()
{
printf("*** _fini().\n");
}
输出有点粗糙,但可以通过替换以下内容来细化到文件和行backtrace_symbols_fd()使用适当的代码
*** pthread_create:
./libhook.so(pthread_create+0x8c)[0x400215d3]
./plock[0x8048a7f]
/lib/i686/cmov/libc.so.6(__libc_start_main+0xe0)[0x4006f450]
./plock[0x8048791]
由于几乎所有东西都是文件(文件夹、套接字、管道等),因此几乎所有东西都可能导致需要关闭的文件描述符。/proc可以帮助
# tree /proc/26041
/proc/26041
...
|-- fd # Open files descriptors
| |-- 0 -> /dev/pts/21
| |-- 1 -> /dev/pts/21
| |-- 2 -> /dev/pts/21
| `-- 3 -> socket:[113497835]
|-- fdinfo
| |-- 0
| |-- 1
| |-- 2
| `-- 3
...
该trace_call命令用于gdb可以帮助查看调用堆栈。
如果机器上没有 gdb,则可以使用一个挂钩中间层库open(), pipe(), socket()等可以构建。
其他可用的工具
- lsof
- fuser
哪个进程正在使用端口?作为 root
# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:36510 0.0.0.0:* LISTEN -
tcp 0 0 127.0.0.1:2207 0.0.0.0:* LISTEN 3438/python
...
# lsof
COMMAND PID USER FD TYPE DEVICE SIZE NODE NAME
init 1 root cwd DIR 253,0 4096 2 /
...
python 3438 root 4u IPv4 11416 TCP localhost.localdomain:2207 (LISTEN)
# lsof -i :2207
COMMAND PID USER FD TYPE DEVICE SIZE NODE NAME
python 3438 root 4u IPv4 11416 TCP localhost.localdomain:2207 (LISTEN)
其他工具
- fuser
用于信号量、共享内存和消息队列。
- ipcs
- ipcrm
# ipcs -spt
------ Semaphore Operation/Change Times --------
semid owner last-op last-changed
187826177 aurelian_m Fri Feb 10 09:37:26 2012 Fri Feb 10 09:33:39 2012
187858946 aurelian_m Fri Feb 10 09:52:11 2012 Fri Feb 10 09:50:44 2012
libmemleak 可以轻松修改以跟踪泄漏的任何资源。挂钩正确的 API(例如open()/close()).