我正在玩gobpf,在计算跟踪的用户空间函数的持续时间时遇到问题。我使用bpf_ktime_get_ns()读取时间,然后尝试计算增量,但是得到了很多数字,尽管跟踪函数仅睡了1秒钟。
这是经过测试的C程序,其功能称为“ ameba”。
#include <stdio.h> #include <strings.h> #include <stdlib.h> #include <time.h> #include <unistd.h> char * ameba(char * s1, char * s2); int main(void) { time_t rawtime; struct tm * timeinfo; time(&rawtime); timeinfo = localtime(&rawtime); printf("enter: %s", asctime (timeinfo)); printf("%s\n", ameba("lessqqmorepewpew", "good luck, have fun")); time(&rawtime); timeinfo = localtime(&rawtime); printf("return: %s", asctime(timeinfo)); } char * ameba(char * s1, char * s2) { char *s; s = (char *) malloc(128); sleep(1); snprintf(s, 128, "phrase1: %s, phrase2: %s", s1, s2); return s; }
去代码
package main import ( "bytes" "encoding/binary" "fmt" "os" "os/signal" "time" bpf "github.com/iovisor/gobpf/bcc" ) const source string = ` #include <uapi/linux/ptrace.h> #include <linux/sched.h> struct val_t { u32 pid; char comm[TASK_COMM_LEN]; u64 ts; }; struct data_t { u32 pid; char comm[TASK_COMM_LEN]; u64 delta; }; BPF_HASH(start, u32, struct val_t); BPF_PERF_OUTPUT(ameba_events); int do_entry(struct pt_regs *ctx) { if (!PT_REGS_PARM1(ctx)) return 0; struct val_t val = {}; u32 pid = bpf_get_current_pid_tgid(); if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { val.pid = bpf_get_current_pid_tgid(); val.ts = bpf_ktime_get_ns(); start.update(&pid, &val); } return 0; } int do_return(struct pt_regs *ctx) { struct val_t *valp; struct data_t data = {}; u64 delta; u32 pid = bpf_get_current_pid_tgid(); u64 tsp = bpf_ktime_get_ns(); valp = start.lookup(&pid); if (valp == 0) return 0; // missed start bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm); data.pid = valp->pid; data.delta = tsp - valp->ts; ameba_events.perf_submit(ctx, &data, sizeof(data)); start.delete(&pid); return 0; } ` type amebaEvent struct { Pid uint32 Comm [16]byte Delta uint64 } func main() { m := bpf.NewModule(source, []string{}) defer m.Close() amebaUprobe, err := m.LoadUprobe("do_entry") if err != nil { fmt.Fprintf(os.Stderr, "Failed to load do_entry: %s\n", err) os.Exit(1) } amebaUretprobe, err := m.LoadUprobe("do_return") if err != nil { fmt.Fprintf(os.Stderr, "Failed to load do_return: %s\n", err) os.Exit(1) } err = m.AttachUprobe("/tmp/sandbox/ameba", "ameba", amebaUprobe, -1) if err != nil { fmt.Fprintf(os.Stderr, "Failed to attach do_entry uprobe: %s\n", err) os.Exit(1) } err = m.AttachUretprobe("/tmp/sandbox/ameba", "ameba", amebaUretprobe, -1) if err != nil { fmt.Fprintf(os.Stderr, "Failed to attach do_return uretprobe: %s\n", err) os.Exit(1) } table := bpf.NewTable(m.TableId("ameba_events"), m) channel := make(chan []byte) perfMap, err := bpf.InitPerfMap(table, channel) if err != nil { fmt.Fprintf(os.Stderr, "Failed to init perf map: %s\n", err) os.Exit(1) } sig := make(chan os.Signal, 1) signal.Notify(sig, os.Interrupt, os.Kill) fmt.Printf("%10s\t%s\t%s\t%s\n", "PID", "COMMAND", "DURATION", "RAW") go func() { var event amebaEvent for { data := <-channel err := binary.Read(bytes.NewBuffer(data), binary.LittleEndian, &event) if err != nil { fmt.Printf("failed to decode received data: %s\n", err) continue } // Convert C string (null-terminated) to Go string comm := string(event.Comm[:bytes.IndexByte(event.Comm[:], 0)]) fmt.Printf("%10d\t%s\t%s\t%d\n", event.Pid, comm, time.Duration(event.Delta), event.Delta) } }() perfMap.Start() <-sig perfMap.Stop() }
程序输出示例:
PID命令持续时间RAW 15515 ameba 1193179h21m9.457496929s 4295445669457496929 15550 ameba 1193198h27m37.653709676s 4295514457653709676
错误在哪里?
这似乎是由内核和用户端之间的填充不匹配引起的。该data_t结构实际上是在编译时填充的,等效于以下内容:
data_t
struct data_t { u32 pid; char padding[4]; char comm[TASK_COMM_LEN]; u64 delta; };
如果您在Go端明确添加相同的填充,您的问题将消失:
type amebaEvent struct { Pid uint32 Pad [4]byte Comm [16]byte Delta uint64 }
产生:
PID COMMAND DURATION RAW 8258 a 1.000179625s 1000179625 8260 a 1.000158337s 1000158337
正如您在评论中提到的,另一种解决方案是使用打包C结构以删除填充__attribute__((packed))。
__attribute__((packed))
它与bcc上的这个问题密切相关-尽管不完全相同-https: //github.com/iovisor/bcc/issues/2017。