我正在编写一个在x86-32和x86-64 Linux中使用libbfd和libopcodes进行反汇编的工具。问题是,尽管我能够分解libopcode,但无法获得任何指令信息。为了演示起见,我举了一个最小的例子来重现我的问题。该程序应从入口点反汇编到第一个RET/ RETQ。
libbfd
libopcodes
RET
RETQ
该代码有点被全局变量所破坏,为简洁起见,省略了错误检查等,但是应该清楚地说明问题。
#include <bfd.h> #include <dis-asm.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/types.h> #include <string.h> #include <ctype.h> #include <limits.h> #include <libiberty.h> /* * Holds state for BFD and libopcodes. */ bfd * abfd = NULL; disassemble_info dinfo = {0}; /* * Temporary hack to signal when disassembling should stop. */ static bool stop_disassembling = FALSE; /* * Gets path to currently running executable. */ bool get_target_path(char * target_path, size_t size) { char * path; ssize_t len; pid_t pid = getpid(); sprintf(target_path, "/proc/%d/exe", (int)pid ); path = strdup(target_path); len = readlink(path, target_path, size); target_path[len] = '\0'; free(path); return TRUE; } /* * libopcodes appends spaces on the end of some instructions so for * comparisons, we want to strip those first. */ void strip_tail(char * str, unsigned int size) { int i; for(i = 0; i < size; i++) { if(!isgraph(str[i])) { str[i] = '\0'; break; } } } /* * Checks whether the current instruction will cause the control flow to not * proceed to the linearly subsequent instruction (e.g. ret, jmp, etc.) */ bool breaks_control_flow(char * str) { if(abfd->arch_info->bits_per_address == 64) { if(strcmp(str, "retq") == 0) { return TRUE; } } else { if(strcmp(str, "ret") == 0) { return TRUE; } } return FALSE; } /* * Used as a callback for libopcodes so we can do something useful with the * disassembly. Currently this just outputs to stdout. */ int custom_fprintf(void * stream, const char * format, ...) { /* silly amount */ char str[128] = {0}; int rv; va_list args; va_start(args, format); rv = vsnprintf(str, ARRAY_SIZE(str) - 1, format, args); va_end(args); puts(str); strip_tail(str, ARRAY_SIZE(str)); if(breaks_control_flow(str)) { puts("Stopped disassembly"); stop_disassembling = TRUE; } if(dinfo.insn_info_valid) { switch(dinfo.insn_type) { case dis_noninsn: printf("not an instruction\n"); break; case dis_nonbranch: printf("not a branch\n"); break; case dis_branch: printf("is a branch\n"); break; case dis_condbranch: printf("is a conditional branch\n"); break; case dis_jsr: printf("jump to subroutine\n"); break; case dis_condjsr: printf("conditional jump to subroutine\n"); break; case dis_dref: printf("data reference in instruction\n"); break; case dis_dref2: printf("two data references in instruction\n"); break; default: printf("not enumerated\n"); break; } } else { printf("insn_info not valid\n"); } return rv; } /* * Initialises libopcodes disassembler and returns an instance of it. */ disassembler_ftype init_disasm(bfd * abfd, disassemble_info * dinfo) { /* Override the stream the disassembler outputs to */ init_disassemble_info(dinfo, NULL, custom_fprintf); dinfo->flavour = bfd_get_flavour(abfd); dinfo->arch = bfd_get_arch(abfd); dinfo->mach = bfd_get_mach(abfd); dinfo->endian = abfd->xvec->byteorder; disassemble_init_for_target(dinfo); return disassembler(abfd); } /* * Method of locating section from VMA taken from opdis. */ typedef struct { bfd_vma vma; asection * sec; } BFD_VMA_SECTION; /* * Loads section and fills in dinfo accordingly. Since this function allocates * memory in dinfo->buffer, callers need to call free once they are finished. */ bool load_section(bfd * abfd, disassemble_info * dinfo, asection * s) { int size = bfd_section_size(s->owner, s); unsigned char * buf = xmalloc(size); if(!bfd_get_section_contents(s->owner, s, buf, 0, size)) { free(buf); return FALSE; } dinfo->section = s; dinfo->buffer = buf; dinfo->buffer_length = size; dinfo->buffer_vma = bfd_section_vma(s->owner, s); printf("Allocated %d bytes for %s section\n: 0x%lX", size, s->name, dinfo->buffer_vma); return TRUE; } /* * Used to locate section for a vma. */ void vma_in_section(bfd * abfd, asection * s, void * data) { BFD_VMA_SECTION * req = data; if(req && req->vma >= s->vma && req->vma < (s->vma + bfd_section_size(abfd, s)) ) { req->sec = s; } } /* * Locate and load section containing vma. */ bool load_section_for_vma(bfd * abfd, disassemble_info * dinfo, bfd_vma vma) { BFD_VMA_SECTION req = {vma, NULL}; bfd_map_over_sections(abfd, vma_in_section, &req); if(!req.sec) { return FALSE; } else { return load_section(abfd, dinfo, req.sec); } } /* * Start disassembling from entry point. */ bool disassemble_entry(bfd * abfd, disassemble_info * dinfo, disassembler_ftype disassembler) { bfd_vma vma = bfd_get_start_address(abfd); /* First locate and load the section containing the vma */ if(load_section_for_vma(abfd, dinfo, vma)) { int size; /* Keep disassembling until signalled otherwise or error */ while(true) { dinfo->insn_info_valid = 0; size = disassembler(vma, dinfo); printf("Disassembled %d bytes at 0x%lX\n", size, vma); if(size == 0 || size == -1 || stop_disassembling) { break; } vma += size; } free(dinfo->buffer); return TRUE; } return FALSE; } int main(void) { char target_path[PATH_MAX] = {0}; bfd_init(); /* Get path for the running instance of this program */ get_target_path(target_path, ARRAY_SIZE(target_path)); abfd = bfd_openr(target_path, NULL); if(abfd != NULL && bfd_check_format(abfd, bfd_object)) { disassembler_ftype disassembler = init_disasm(abfd, &dinfo); disassemble_entry(abfd, &dinfo, disassembler); bfd_close(abfd); } return EXIT_SUCCESS; }
可以使用以下内容构建此源makefile。要执行成功的链接,binutils-dev需要将该软件包安装在本地计算机上:
makefile
binutils-dev
all: gcc -Wall disasm.c -o disasm -lbfd -lopcodes clean: rm -f disasm
运行时,输出为:
Allocated 2216 bytes for .text section : 0x400BF0xor insn_info not valid %ebp insn_info not valid , insn_info not valid %ebp insn_info not valid Disassembled 2 bytes at 0x400BF0 mov insn_info not valid %rdx insn_info not valid , insn_info not valid %r9 insn_info not valid Disassembled 3 bytes at 0x400BF2 pop insn_info not valid %rsi insn_info not valid Disassembled 1 bytes at 0x400BF5 mov insn_info not valid %rsp insn_info not valid , insn_info not valid %rdx insn_info not valid Disassembled 3 bytes at 0x400BF6 and insn_info not valid $0xfffffffffffffff0 insn_info not valid , insn_info not valid %rsp insn_info not valid Disassembled 4 bytes at 0x400BF9 push insn_info not valid %rax insn_info not valid Disassembled 1 bytes at 0x400BFD push insn_info not valid %rsp insn_info not valid Disassembled 1 bytes at 0x400BFE mov insn_info not valid $0x401450 insn_info not valid , insn_info not valid %r8 insn_info not valid Disassembled 7 bytes at 0x400BFF mov insn_info not valid $0x4013c0 insn_info not valid , insn_info not valid %rcx insn_info not valid Disassembled 7 bytes at 0x400C06 mov insn_info not valid $0x4012ce insn_info not valid , insn_info not valid %rdi insn_info not valid Disassembled 7 bytes at 0x400C0D callq insn_info not valid 0x0000000000400ad8 insn_info not valid Disassembled 5 bytes at 0x400C14 hlt insn_info not valid Disassembled 1 bytes at 0x400C19 nop insn_info not valid Disassembled 1 bytes at 0x400C1A nop insn_info not valid Disassembled 1 bytes at 0x400C1B sub insn_info not valid $0x8 insn_info not valid , insn_info not valid %rsp insn_info not valid Disassembled 4 bytes at 0x400C1C mov insn_info not valid 0x2013b9(%rip) insn_info not valid , insn_info not valid %rax insn_info not valid # insn_info not valid 0x0000000000601fe0 insn_info not valid Disassembled 7 bytes at 0x400C20 test insn_info not valid %rax insn_info not valid , insn_info not valid %rax insn_info not valid Disassembled 3 bytes at 0x400C27 je insn_info not valid 0x0000000000400c2e insn_info not valid Disassembled 2 bytes at 0x400C2A callq insn_info not valid *%rax insn_info not valid Disassembled 2 bytes at 0x400C2C add insn_info not valid $0x8 insn_info not valid , insn_info not valid %rsp insn_info not valid Disassembled 4 bytes at 0x400C2E retq Stopped disassembly insn_info not valid Disassembled 1 bytes at 0x400C32
我所希望的是能够读取通过每个指令的指令信息dinfo->insn_type,target等等,行为展上双方的x86-32和x86-64。如果至少可以确认这两种架构上都没有实现,那么我可以自己填写此信息。
dinfo->insn_type
target
不幸的是,从binutils libopcodes 2.22开始,insn_typei386或x86_64上都没有填写。唯一受广泛支持的体系结构是MIPS,Sparc和Cell的SPU。从当前的CVS HEAD开始,情况仍然如此。
insn_type
很难证明不存在某些东西,但是,例如,在Sparc反汇编程序源中,您可以看到多次insn_type被设置的情况info->insn_type = dis_branch,例如,而在i386反汇编程序源中,则不存在insn_type或将存在任何值。预期具有(dis_branch,dis_nonbranch等等)。
info->insn_type = dis_branch
dis_branch
dis_nonbranch
检查所有支持insn_type您的libopcodes文件:
opcodes/mips-dis.c
opcodes/spu-dis.c
opcodes/microblaze-dis.c
opcodes/cris-dis.c
opcodes/sparc-dis.c
opcodes/mmix-dis.c