在研究这个问题时,我遇到了一个使用的可能想法ptrace,但是我无法正确了解如何ptrace与线程交互。
ptrace
假设我有一个给定的多线程主进程,并且我想附加到其中的特定线程(可能来自派生的子进程)。
我可以附加到特定线程吗?(有关此问题的手册有所不同。)
如果是这样,是否意味着单步执行仅一步步执行该线程的指令?它会停止所有进程的线程吗?
如果是这样,在我调用PTRACE_SYSCALL或时PTRACE_SINGLESTEP,其他所有线程是否仍保持停止状态?还是 所有 线程都继续?有没有一种方法可以只在一个线程中前进,但可以确保其他线程保持停止状态?
PTRACE_SYSCALL
PTRACE_SINGLESTEP
基本上,我想通过强制所有线程停止来同步原始程序,然后仅通过单步执行一个被跟踪的线程来执行一小套单线程指令。
到目前为止,我的个人尝试看起来像这样:
pid_t target = syscall(SYS_gettid); // get the calling thread's ID pid_t pid = fork(); if (pid > 0) { waitpid(pid, NULL, 0); // synchronise main process important_instruction(); } else if (pid == 0) { ptrace(target, PTRACE_ATTACH, NULL, NULL); // does this work? // cancel parent's "waitpid" call, e.g. with a signal // single-step to execute "important_instruction()" above ptrace(target, PTRACE_DETACH, NULL, NULL); // parent's threads resume? _Exit(0); }
但是,我不确定,也找不到合适的引用,它是并发正确的,important_instruction()并且保证仅在所有其他线程停止时才执行。我还了解,当父母从其他地方接收到信号时,可能会出现竞争状况,并且我听说应该改用父母PTRACE_SEIZE,但似乎并非到处都存在。
important_instruction()
PTRACE_SEIZE
任何澄清或参考将不胜感激!
我写了第二个测试用例。我不得不添加一个单独的答案,因为它太长了,无法放入包含示例输出的第一个答案。
首先,这里是tracer.c:
tracer.c
#include <unistd.h> #include <stdlib.h> #include <sys/types.h> #include <sys/ptrace.h> #include <sys/prctl.h> #include <sys/wait.h> #include <sys/user.h> #include <dirent.h> #include <string.h> #include <signal.h> #include <errno.h> #include <stdio.h> #ifndef SINGLESTEPS #define SINGLESTEPS 10 #endif /* Similar to getline(), except gets process pid task IDs. * Returns positive (number of TIDs in list) if success, * otherwise 0 with errno set. */ size_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid) { char dirname[64]; DIR *dir; pid_t *list; size_t size, used = 0; if (!listptr || !sizeptr || pid < (pid_t)1) { errno = EINVAL; return (size_t)0; } if (*sizeptr > 0) { list = *listptr; size = *sizeptr; } else { list = *listptr = NULL; size = *sizeptr = 0; } if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) { errno = ENOTSUP; return (size_t)0; } dir = opendir(dirname); if (!dir) { errno = ESRCH; return (size_t)0; } while (1) { struct dirent *ent; int value; char dummy; errno = 0; ent = readdir(dir); if (!ent) break; /* Parse TIDs. Ignore non-numeric entries. */ if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1) continue; /* Ignore obviously invalid entries. */ if (value < 1) continue; /* Make sure there is room for another TID. */ if (used >= size) { size = (used | 127) + 128; list = realloc(list, size * sizeof list[0]); if (!list) { closedir(dir); errno = ENOMEM; return (size_t)0; } *listptr = list; *sizeptr = size; } /* Add to list. */ list[used++] = (pid_t)value; } if (errno) { const int saved_errno = errno; closedir(dir); errno = saved_errno; return (size_t)0; } if (closedir(dir)) { errno = EIO; return (size_t)0; } /* None? */ if (used < 1) { errno = ESRCH; return (size_t)0; } /* Make sure there is room for a terminating (pid_t)0. */ if (used >= size) { size = used + 1; list = realloc(list, size * sizeof list[0]); if (!list) { errno = ENOMEM; return (size_t)0; } *listptr = list; *sizeptr = size; } /* Terminate list; done. */ list[used] = (pid_t)0; errno = 0; return used; } static int wait_process(const pid_t pid, int *const statusptr) { int status; pid_t p; do { status = 0; p = waitpid(pid, &status, WUNTRACED | WCONTINUED); } while (p == (pid_t)-1 && errno == EINTR); if (p != pid) return errno = ESRCH; if (statusptr) *statusptr = status; return errno = 0; } static int continue_process(const pid_t pid, int *const statusptr) { int status; pid_t p; do { if (kill(pid, SIGCONT) == -1) return errno = ESRCH; do { status = 0; p = waitpid(pid, &status, WUNTRACED | WCONTINUED); } while (p == (pid_t)-1 && errno == EINTR); if (p != pid) return errno = ESRCH; } while (WIFSTOPPED(status)); if (statusptr) *statusptr = status; return errno = 0; } void show_registers(FILE *const out, pid_t tid, const char *const note) { struct user_regs_struct regs; long r; do { r = ptrace(PTRACE_GETREGS, tid, ®s, ®s); } while (r == -1L && errno == ESRCH); if (r == -1L) return; #if (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 64 if (note && *note) fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx. %s\n", (int)tid, regs.rip, regs.rsp, note); else fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx.\n", (int)tid, regs.rip, regs.rsp); #elif (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 32 if (note && *note) fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx. %s\n", (int)tid, regs.eip, regs.esp, note); else fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx.\n", (int)tid, regs.eip, regs.esp); #endif } int main(int argc, char *argv[]) { pid_t *tid = 0; size_t tids = 0; size_t tids_max = 0; size_t t, s; long r; pid_t child; int status; if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]); fprintf(stderr, " %s COMMAND [ ARGS ... ]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "This program executes COMMAND in a child process,\n"); fprintf(stderr, "and waits for it to stop (via a SIGSTOP signal).\n"); fprintf(stderr, "When that occurs, the register state of each thread\n"); fprintf(stderr, "is dumped to standard output, then the child process\n"); fprintf(stderr, "is sent a SIGCONT signal.\n"); fprintf(stderr, "\n"); return 1; } child = fork(); if (child == (pid_t)-1) { fprintf(stderr, "fork() failed: %s.\n", strerror(errno)); return 1; } if (!child) { prctl(PR_SET_DUMPABLE, (long)1); prctl(PR_SET_PTRACER, (long)getppid()); fflush(stdout); fflush(stderr); execvp(argv[1], argv + 1); fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno)); return 127; } fprintf(stderr, "Tracer: Waiting for child (pid %d) events.\n\n", (int)child); fflush(stderr); while (1) { /* Wait for a child event. */ if (wait_process(child, &status)) break; /* Exited? */ if (WIFEXITED(status) || WIFSIGNALED(status)) { errno = 0; break; } /* At this point, only stopped events are interesting. */ if (!WIFSTOPPED(status)) continue; /* Obtain task IDs. */ tids = get_tids(&tid, &tids_max, child); if (!tids) break; printf("Process %d has %d tasks,", (int)child, (int)tids); fflush(stdout); /* Attach to all tasks. */ for (t = 0; t < tids; t++) { do { r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0); } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); if (r == -1L) { const int saved_errno = errno; while (t-->0) do { r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0); } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); tids = 0; errno = saved_errno; break; } } if (!tids) { const int saved_errno = errno; if (continue_process(child, &status)) break; printf(" failed to attach (%s).\n", strerror(saved_errno)); fflush(stdout); if (WIFCONTINUED(status)) continue; errno = 0; break; } printf(" attached to all.\n\n"); fflush(stdout); /* Dump the registers of each task. */ for (t = 0; t < tids; t++) show_registers(stdout, tid[t], ""); printf("\n"); fflush(stdout); for (s = 0; s < SINGLESTEPS; s++) { do { r = ptrace(PTRACE_SINGLESTEP, tid[tids-1], (void *)0, (void *)0); } while (r == -1L && errno == ESRCH); if (!r) { for (t = 0; t < tids - 1; t++) show_registers(stdout, tid[t], ""); show_registers(stdout, tid[tids-1], "Advanced by one step."); printf("\n"); fflush(stdout); } else { fprintf(stderr, "Single-step failed: %s.\n", strerror(errno)); fflush(stderr); } } /* Detach from all tasks. */ for (t = 0; t < tids; t++) do { r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0); } while (r == -1 && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); tids = 0; if (continue_process(child, &status)) break; if (WIFCONTINUED(status)) { printf("Detached. Waiting for new stop events.\n\n"); fflush(stdout); continue; } errno = 0; break; } if (errno) fprintf(stderr, "Tracer: Child lost (%s)\n", strerror(errno)); else if (WIFEXITED(status)) fprintf(stderr, "Tracer: Child exited (%d)\n", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(stderr, "Tracer: Child died from signal %d\n", WTERMSIG(status)); else fprintf(stderr, "Tracer: Child vanished\n"); fflush(stderr); return status; }
tracer.c执行指定的命令,等待命令接收SIGSTOP信号。(tracer.c本身不发送;您可以让跟踪停止自身,或从外部发送信号。)
SIGSTOP
命令停止后,tracer.c将ptrace附加到每个线程,并以固定数量的步长(SINGLESTEPS编译时常数)单步执行其中一个线程,从而显示每个线程的相关寄存器状态。
SINGLESTEPS
之后,它将与命令分离,并向其发送SIGCONT信号以使其继续正常运行。
SIGCONT
这是一个简单的测试程序worker.c,我用于测试:
worker.c
#include <pthread.h> #include <signal.h> #include <string.h> #include <errno.h> #include <stdio.h> #ifndef THREADS #define THREADS 2 #endif volatile sig_atomic_t done = 0; void catch_done(int signum) { done = signum; } int install_done(const int signum) { struct sigaction act; sigemptyset(&act.sa_mask); act.sa_handler = catch_done; act.sa_flags = 0; if (sigaction(signum, &act, NULL)) return errno; else return 0; } void *worker(void *data) { volatile unsigned long *const counter = data; while (!done) __sync_add_and_fetch(counter, 1UL); return (void *)(unsigned long)__sync_or_and_fetch(counter, 0UL); } int main(void) { unsigned long counter = 0UL; pthread_t thread[THREADS]; pthread_attr_t attrs; size_t i; if (install_done(SIGHUP) || install_done(SIGTERM) || install_done(SIGUSR1)) { fprintf(stderr, "Worker: Cannot install signal handlers: %s.\n", strerror(errno)); return 1; } pthread_attr_init(&attrs); pthread_attr_setstacksize(&attrs, 65536); for (i = 0; i < THREADS; i++) if (pthread_create(&thread[i], &attrs, worker, &counter)) { done = 1; fprintf(stderr, "Worker: Cannot create thread: %s.\n", strerror(errno)); return 1; } pthread_attr_destroy(&attrs); /* Let the original thread also do the worker dance. */ worker(&counter); for (i = 0; i < THREADS; i++) pthread_join(thread[i], NULL); return 0; }
使用eg编译两者
gcc -W -Wall -O3 -fomit-frame-pointer worker.c -pthread -o worker gcc -W -Wall -O3 -fomit-frame-pointer tracer.c -o tracer
并在单独的终端或后台运行,例如
./tracer ./worker &
跟踪器显示工作者的PID:
Tracer: Waiting for child (pid 24275) events.
此时,孩子正在正常运行。当您发送SIGSTOP给孩子时,动作开始。跟踪器检测到它,进行所需的跟踪,然后分离并让孩子正常继续:
kill -STOP 24275 Process 24275 has 3 tasks, attached to all. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step. Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428. Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8. Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step. Detached. Waiting for new stop events.
您可以根据需要多次重复上述操作。请注意,我选择了该SIGSTOP信号作为触发器,因为这种方式tracer.c还可以用作为每个请求生成复杂的多线程核心转储的基础(因为多线程进程可以通过向自身发送a来简单地触发它SIGSTOP)。
worker()在上面的示例中,线程的反汇编功能都在旋转:
worker()
0x400a50: eb 0b jmp 0x400a5d 0x400a52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0x400a58: f0 48 83 07 01 lock addq $0x1,(%rdi) = fourth step 0x400a5d: 8b 05 00 00 00 00 mov 0x0(%rip),%eax = first step 0x400a63: 85 c0 test %eax,%eax = second step 0x400a65: 74 f1 je 0x400a58 = third step 0x400a67: 48 8b 07 mov (%rdi),%rax 0x400a6a: 48 89 c2 mov %rax,%rdx 0x400a6d: f0 48 0f b1 07 lock cmpxchg %rax,(%rdi) 0x400a72: 75 f6 jne 0x400a6a 0x400a74: 48 89 d0 mov %rdx,%rax 0x400a77: c3 retq
现在,此测试程序仅显示了如何停止进程,将其附加到其所有线程,单步执行所需数量的指令,然后让所有线程正常继续; 尚 不能证明同样适用于让特定线程正常继续(通过PTRACE_CONT)。但是,我在下面描述的细节向我表明,相同的方法应适用于PTRACE_CONT。
PTRACE_CONT
我在编写上述测试程序时遇到的主要问题或惊奇是:
long r; do { r = ptrace(PTRACE_cmd, tid, ...); } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
循环,特别是对于这种ESRCH情况(由于ptrace手册页描述而仅添加了其他情况)。
ESRCH
您会看到,大多数ptrace命令仅在任务停止时才被允许。但是,当任务仍在完成时,例如单步命令,它不会停止。因此,使用上述循环- 可能添加毫秒级的nanosleep或类似操作以避免浪费CPU-确保在尝试提供新的ptrace命令之前,该命令已经完成(因此任务已停止)。
Kerrek SB,我相信至少您在测试程序中遇到的一些麻烦是由于此问题引起的吗?对我而言,这真是一种 D’oh! 意识到这一点当然是必要的,因为追踪本质上是异步的,而不是同步的。
(这是异步也为事业SIGCONT- PTRACE_CONT。相互作用我上面提到的我不使用上面的循环,这种互动已经不再是一个问题,妥善处理相信- 实际上是可以理解的)
在此答案的注释中添加:
Linux内核在task_struct结构中使用一组任务状态标志(请参阅参考资料include/linux/sched.h以了解定义)来跟踪每个任务的状态。的面向用户空间的一面在ptrace()中定义kernel/ptrace.c。
include/linux/sched.h
ptrace()
kernel/ptrace.c
当PTRACE_SINGLESTEP或者PTRACE_CONT被调用时,kernel/ptrace.c:ptrace_continue()处理大部分的细节。通过调用wake_up_state(child, __TASK_TRACED)(kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0))完成。
ptrace_continue()
wake_up_state(child, __TASK_TRACED)
kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)
当通过SIGSTOP信号停止一个进程时,所有任务将被停止,并最终处于 “已停止,未跟踪” 状态。
附加到每一个任务(通过PTRACE_ATTACH或PTRACE_SEIZE,请参阅kernel/ptrace.c:ptrace_attach())修改任务状态。然而,ptrace的状态位(参见include/linux/ptrace.h:PT_常数)是从任务可运行状态位(参见单独include/linux/sched.h:TASK_常数)。
ptrace_attach()
include/linux/ptrace.h:PT_
include/linux/sched.h:TASK_
附加到任务并向进程发送SIGCONT信号后,停止状态不会立即被修改(我相信),因为也正在跟踪任务。执行PTRACE_SINGLESTEP或PTRACE_CONT最终以结束kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0),这将更新任务状态,并将任务移至运行队列。
现在,我还没有找到代码路径的复杂部分是下次计划任务时如何在内核中更新任务状态。我的测试表明,通过单步执行(这是另一个任务状态标志),只有任务状态被更新,并且清除了单步标志。看来PTRACE_CONT不那么可靠;我相信这是因为单步标记“强制”了任务状态的改变。也许有一个“竞赛条件”。继续传递信号和改变状态?
(进一步编辑:内核开发人员肯定希望wait()调用它,例如参见此线程。)
wait()
In other words, after noticing that the process has stopped (note that you can use /proc/PID/stat or /proc/PID/status if the process is not a child, and not yet attached to), I believe the following procedure is the most robust one:
/proc/PID/stat
/proc/PID/status
pid_t pid, p; /* Process owning the tasks */ tid_t *tid; /* Task ID array */ size_t tids; /* Tasks */ long result; int status; size_t i; for (i = 0; i < tids; i++) { while (1) { result = ptrace(PTRACE_ATTACH, tid[i], (void *)0, (void *)0); if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) { /* To avoid burning up CPU for nothing: */ sched_yield(); /* or nanosleep(), or usleep() */ continue; } break; } if (result == -1L) { /* * Fatal error. First detach from tid[0..i-1], then exit. */ } } /* Send SIGCONT to the process. */ if (kill(pid, SIGCONT)) { /* * Fatal error, see errno. Exit. */ } /* Since we are attached to the process, * we can wait() on it. */ while (1) { errno = 0; status = 0; p = waitpid(pid, &status, WCONTINUED); if (p == (pid_t)-1) { if (errno == EINTR) continue; else break; } else if (p != pid) { errno = ESRCH; break; } else if (WIFCONTINUED(status)) { errno = 0; break; } } if (errno) { /* * Fatal error. First detach from tid[0..tids-1], then exit. */ } /* Single-step each task to update the task states. */ for (i = 0; i < tids; i++) { while (1) { result = ptrace(PTRACE_SINGLESTEP, tid[i], (void *)0, (void *)0); if (result == -1L && errno == ESRCH) { /* To avoid burning up CPU for nothing: */ sched_yield(); /* or nanosleep(), or usleep() */ continue; } break; } if (result == -1L) { /* * Fatal error. First detach from tid[0..i-1], then exit. */ } } /* Obtain task register structures, to make sure the single-steps * have completed and their states have stabilized. */ for (i = 0; i < tids; i++) { struct user_regs_struct regs; while (1) { result = ptrace(PTRACE_GETREGS, tid[i], ®s, ®s); if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) { /* To avoid burning up CPU for nothing: */ sched_yield(); /* or nanosleep(), or usleep() */ continue; } break; } if (result == -1L) { /* * Fatal error. First detach from tid[0..i-1], then exit. */ } }
After the above, all tasks should be attached and in the expected state, so that e.g. PTRACE_CONT works without further tricks.
If the behaviour changes in future kernels – I do believe the interaction between the STOP/CONT signals and ptracing is something that might change; at least a question to the LKML developers about this behaviour would be warranted! –, the above procedure will still work robustly. (Erring on the side of caution, by using a loop to PTRACE_SINGLESTEP a few times, might also be a good idea.)
The difference to PTRACE_CONT is that if the behaviour changes in the future, the initial PTRACE_CONT might actually continue the process, causing the ptrace() that follow it to fail. With PTRACE_SINGLESTEP, the process will stop, allowing further ptrace() calls to succeed.
Questions?