跳转到内容

Linux 应用程序调试技术/资源泄漏

来自维基教科书,开放的书籍,为开放的世界

僵尸线程

[编辑 | 编辑源代码]

任何已终止但未被加入或分离的线程都会泄漏操作系统资源,直到进程终止。不幸的是,无论是/proc还是gdb都不会显示这些僵尸线程,至少在某些内核上不会。

一种获取它们的方法是使用 gdb 预定义命令

#
#
#
define trace_call
    b $arg0
    commands
    bt full
    continue
    end
end
document trace_call
Trace specified call with call stack to screen. Example:
    set breakpoint pending on
    set pagination off
    set logging on
    trace_call __pthread_create_2_1
end
Using host libthread_db library "/lib/i686/cmov/libthread_db.so.1".
(gdb) trace_call __pthread_create_2_1
Function "__pthread_create_2_1" not defined.
Breakpoint 1 (__pthread_create_2_1) pending.
(gdb) trace_call __pthread_create_2_0
Function "__pthread_create_2_0" not defined.
Breakpoint 2 (__pthread_create_2_0) pending.
(gdb) r
Starting program: /home/amelinte/projects/articole/wikibooks/debug/plock foo bar bax
[Thread debugging using libthread_db enabled]
Breakpoint 3 at 0xb7f9b746
Pending breakpoint "__pthread_create_2_1" resolved
Breakpoint 4 at 0xb7f9c395
Pending breakpoint "__pthread_create_2_0" resolved
[New Thread 0xb7e48ad0 (LWP 8635)]
[Switching to Thread 0xb7e48ad0 (LWP 8635)]

Breakpoint 3, 0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
#0  0xb7f9b746 in pthread_create@@GLIBC_2.1 () from /lib/i686/cmov/libpthread.so.0
No symbol table info available.
#1  0x08048a7f in main (argc=4, argv=0xbfceb714) at plock.c:97
        s = 0
        tnum = 0
        opt = -1
        num_threads = 3
        tinfo = (struct thread_info *) 0x833b008
        attr = {__size = '\0' <repeats 13 times>, "\020", '\0' <repeats 21 times>, __align = 0}
        stack_size = -1
        res = (void *) 0x0
[New Thread 0xb7e47b90 (LWP 8638)]
Thread 1: top of stack near 0xb7e473c8; argv_string=foo

另一种方法是使用(同样)一个中间层库

/*
 *  Hook library. Usage: 
 *    gcc -c -g -Wall -fPIC libhook.c -o libhook.o 
 *    ld -o libhook.so libhook.o -shared -ldl
 *    LD_PRELOAD=./libhook.so program arguments
 * 
 *  Copyright 2012 Aurelian Melinte. 
 *  Released under GPL 3.0 or later. 
 */

#define _GNU_SOURCE
#include <dlfcn.h>

#include <signal.h>
#include <execinfo.h>

#include <errno.h>
#include <stdlib.h>
#include <stdio.h>  /*printf*/
#include <unistd.h>

#include <pthread.h>

#include <assert.h>



typedef int (*lp_pthread_mutex_func)(pthread_mutex_t *mutex);
typedef int (*pthread_create_func)(pthread_t *thread, 
                                   const pthread_attr_t *attr,
								   void *(*start_routine) (void *), void *arg);
static pthread_create_func  _pthread_create_hook = NULL;


static int
hook_one(pthread_create_func *fptr, const char *fname)
{
    char *msg = NULL;

    assert(fname != NULL);

    if (*fptr == NULL) {
        printf("dlsym : wrapping %s\n", fname);
        *fptr = dlsym(RTLD_NEXT, fname);
        printf("next_%s = %p\n", fname, *fptr);
        if ((*fptr == NULL) || ((msg = dlerror()) != NULL)) {
            printf("dlsym %s failed : %s\n", fname, msg);
            return -1;
        } else {
            printf("dlsym: wrapping %s done\n", fname);
            return 0;
        }
    } else {
        return 0;
    }
}


static void
hook_funcs(void)
{
    if (_pthread_create_hook == NULL) {
        int rc = hook_one(&_pthread_create_hook, "pthread_create"); 
        if (NULL == _pthread_create_hook || rc != 0) {
            printf("Failed to hook.\n");
            exit(EXIT_FAILURE);
        }
    }
}


/*
 *
 */
 

int 
pthread_create(pthread_t *thread, 
               const pthread_attr_t *attr,
               void *(*start_routine) (void *), void *arg)
{
#define SIZE 40
    void *buffer[SIZE] = {0};
	int nptrs = 0;

    int rc = EINVAL; 
	
	rc = _pthread_create_hook(thread, attr, start_routine, arg);

    printf("*** pthread_create:\n");
    nptrs = backtrace(buffer, SIZE);
    backtrace_symbols_fd(buffer, nptrs, STDOUT_FILENO);

    return rc; 
}

/*
 *
 */
 
void _init()  __attribute__((constructor));
void 
_init()
{
    printf("*** _init().\n");
    hook_funcs();
}


void  _fini()  __attribute__((destructor)); 
void  
_fini()
{
    printf("*** _fini().\n");
}

输出有点粗糙,但可以通过替换以下内容来细化到文件和行backtrace_symbols_fd()使用适当的代码

*** pthread_create:
./libhook.so(pthread_create+0x8c)[0x400215d3]
./plock[0x8048a7f]
/lib/i686/cmov/libc.so.6(__libc_start_main+0xe0)[0x4006f450]
./plock[0x8048791]

文件描述符

[编辑 | 编辑源代码]

由于几乎所有东西都是文件(文件夹、套接字、管道等),因此几乎所有东西都可能导致需要关闭的文件描述符。/proc可以帮助

# tree /proc/26041
/proc/26041
...
|-- fd                  # Open files descriptors
|   |-- 0 -> /dev/pts/21
|   |-- 1 -> /dev/pts/21
|   |-- 2 -> /dev/pts/21
|   `-- 3 -> socket:[113497835]
|-- fdinfo
|   |-- 0
|   |-- 1
|   |-- 2
|   `-- 3
...

trace_call命令用于gdb可以帮助查看调用堆栈。

如果机器上没有 gdb,则可以使用一个挂钩中间层库open(), pipe(), socket()等可以构建。

其他可用的工具

  • lsof
  • fuser

哪个进程正在使用端口?作为 root

# netstat -tlnp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address               Foreign Address             State       PID/Program name   
tcp        0      0 0.0.0.0:36510               0.0.0.0:*                   LISTEN      -                   
tcp        0      0 127.0.0.1:2207              0.0.0.0:*                   LISTEN      3438/python         
...
# lsof
COMMAND     PID             USER   FD      TYPE             DEVICE       SIZE       NODE NAME
init          1             root  cwd       DIR              253,0       4096          2 /
...
python     3438             root    4u     IPv4              11416                   TCP localhost.localdomain:2207 (LISTEN)

# lsof -i :2207
COMMAND  PID USER   FD   TYPE DEVICE SIZE NODE NAME
python  3438 root    4u  IPv4  11416       TCP localhost.localdomain:2207 (LISTEN)

其他工具

  • fuser

用于信号量、共享内存和消息队列。

  • ipcs
  • ipcrm
# ipcs -spt
------ Semaphore Operation/Change Times --------
semid    owner      last-op                    last-changed              
187826177 aurelian_m  Fri Feb 10 09:37:26 2012   Fri Feb 10 09:33:39 2012  
187858946 aurelian_m  Fri Feb 10 09:52:11 2012   Fri Feb 10 09:50:44 2012

DIY:一个中间层资源计数器

[编辑 | 编辑源代码]

libmemleak 可以轻松修改以跟踪泄漏的任何资源。挂钩正确的 API(例如open()/close()).

华夏公益教科书