Provided by: libdisasm-dev_0.23-6build1_amd64
NAME
x86_disasm, x86_disasm_forward, x86_disasm_range - disassemble a bytestream to x86 assembly language instructions
SYNOPSIS
#include <libdis.h> typedef void (*DISASM_CALLBACK)( x86_insn_t *, void * ); typedef long (*DISASM_RESOLVER)( x86_op_t *, x86_insn_t *, void * ); int x86_disasm( unsigned char *buf, unsigned int buf_len, unsigned long buf_rva, unsigned int offset, x86_insn_t * insn ); int x86_disasm_range( unsigned char *buf, unsigned long buf_rva, unsigned int offset, unsigned int len, DISASM_CALLBACK func, void *arg ); int x86_disasm_forward( unsigned char *buf, unsigned int buf_len, unsigned long buf_rva, unsigned int offset, DISASM_CALLBACK func, void *arg, DISASM_RESOLVER resolver, void *r_arg );
DESCRIPTION
#define MAX_REGNAME 8 #define MAX_PREFIX_STR 32 #define MAX_MNEM_STR 16 #define MAX_INSN_SIZE 20 /* same as in i386.h */ #define MAX_OP_STRING 32 /* max possible operand size in string form */ #define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */ #define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */ #define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */ #define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */ #define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */ #define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */ enum x86_reg_type { /* NOTE: these may be ORed together */ reg_gen = 0x00001, /* general purpose */ reg_in = 0x00002, /* incoming args, ala RISC */ reg_out = 0x00004, /* args to calls, ala RISC */ reg_local = 0x00008, /* local vars, ala RISC */ reg_fpu = 0x00010, /* FPU data register */ reg_seg = 0x00020, /* segment register */ reg_simd = 0x00040, /* SIMD/MMX reg */ reg_sys = 0x00080, /* restricted/system register */ reg_sp = 0x00100, /* stack pointer */ reg_fp = 0x00200, /* frame pointer */ reg_pc = 0x00400, /* program counter */ reg_retaddr = 0x00800, /* return addr for func */ reg_cond = 0x01000, /* condition code / flags */ reg_zero = 0x02000, /* zero register, ala RISC */ reg_ret = 0x04000, /* return value */ reg_src = 0x10000, /* array/rep source */ reg_dest = 0x20000, /* array/rep destination */ reg_count = 0x40000 /* array/rep/loop counter */ }; typedef struct { char name[MAX_REGNAME]; enum x86_reg_type type; /* what register is used for */ unsigned int size; /* size of register in bytes */ unsigned int id; /* register ID #, for quick compares */ unsigned int alias; /* ID of reg this is an alias of */ unsigned int shift; /* amount to shift aliased reg by */ } x86_reg_t; typedef struct { unsigned int scale; /* scale factor */ x86_reg_t index, base; /* index, base registers */ long disp; /* displacement */ char disp_sign; /* is negative? 1/0 */ char disp_size; /* 0, 1, 2, 4 */ } x86_ea_t; enum x86_op_type { /* mutually exclusive */ op_unused = 0, /* empty/unused operand */ op_register = 1, /* CPU register */ op_immediate = 2, /* Immediate Value */ op_relative_near = 3, /* Relative offset from IP */ op_relative_far = 4, op_absolute = 5, /* Absolute address (ptr16:32) */ op_expression = 6, /* Address expression (scale/index/base/disp) */ op_offset = 7, /* Offset from start of segment (m32) */ op_unknown }; enum x86_op_datatype { /* these use Intel's lame terminology */ op_byte = 1, /* 1 byte integer */ op_word = 2, /* 2 byte integer */ op_dword = 3, /* 4 byte integer */ op_qword = 4, /* 8 byte integer */ op_dqword = 5, /* 16 byte integer */ op_sreal = 6, /* 4 byte real (single real) */ op_dreal = 7, /* 8 byte real (double real) */ op_extreal = 8, /* 10 byte real (extended real) */ op_bcd = 9, /* 10 byte binary-coded decimal */ op_simd = 10, /* 16 byte packed (SIMD, MMX) */ op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ op_fpuenv = 11 /* 28 byte FPU control/environment data */ }; enum x86_op_access { /* ORed together */ op_read = 1, op_write = 2, op_execute = 4 }; enum x86_op_flags { /* ORed together, but segs are mutually exclusive */ op_signed = 1, /* signed integer */ op_string = 2, /* possible string or array */ op_constant = 4, /* symbolic constant */ op_pointer = 8, /* operand points to a memory address */ op_sysref = 0x010, /* operand is a syscall number */ op_implied = 0x020, /* operand is implicit in insn */ op_hardcode = 0x040, /* operans is hardcoded in insn */ op_es_seg = 0x100, /* ES segment override */ op_cs_seg = 0x200, /* CS segment override */ op_ss_seg = 0x300, /* SS segment override */ op_ds_seg = 0x400, /* DS segment override */ op_fs_seg = 0x500, /* FS segment override */ op_gs_seg = 0x600 /* GS segment override */ }; typedef struct { enum x86_op_type type; /* operand type */ enum x86_op_datatype datatype; /* operand size */ enum x86_op_access access; /* operand access [RWX] */ enum x86_op_flags flags; /* misc flags */ union { /* immediate values */ char sbyte; short sword; long sdword; qword sqword; unsigned char byte; unsigned short word; unsigned long dword; qword qword; float sreal; double dreal; /* misc large/non-native types */ unsigned char extreal[10]; unsigned char bcd[10]; qword dqword[2]; unsigned char simd[16]; unsigned char fpuenv[28]; /* absolute address */ void * address; /* offset from segment */ unsigned long offset; /* ID of CPU register */ x86_reg_t reg; /* offsets from current insn */ char relative_near; long relative_far; /* effective address [expression] */ x86_ea_t expression; } data; void * insn; } x86_op_t; typedef struct x86_operand_list { x86_op_t op; struct x86_operand_list *next; } x86_oplist_t; enum x86_insn_group { insn_none = 0, insn_controlflow = 1, insn_arithmetic = 2, insn_logic = 3, insn_stack = 4, insn_comparison = 5, insn_move = 6, insn_string = 7, insn_bit_manip = 8, insn_flag_manip = 9, insn_fpu = 10, insn_interrupt = 13, insn_system = 14, insn_other = 15 }; enum x86_insn_type { insn_invalid = 0, /* insn_controlflow */ insn_jmp = 0x1001, insn_jcc = 0x1002, insn_call = 0x1003, insn_callcc = 0x1004, insn_return = 0x1005, insn_loop = 0x1006, /* insn_arithmetic */ insn_add = 0x2001, insn_sub = 0x2002, insn_mul = 0x2003, insn_div = 0x2004, insn_inc = 0x2005, insn_dec = 0x2006, insn_shl = 0x2007, insn_shr = 0x2008, insn_rol = 0x2009, insn_ror = 0x200A, /* insn_logic */ insn_and = 0x3001, insn_or = 0x3002, insn_xor = 0x3003, insn_not = 0x3004, insn_neg = 0x3005, /* insn_stack */ insn_push = 0x4001, insn_pop = 0x4002, insn_pushregs = 0x4003, insn_popregs = 0x4004, insn_pushflags = 0x4005, insn_popflags = 0x4006, insn_enter = 0x4007, insn_leave = 0x4008, /* insn_comparison */ insn_test = 0x5001, insn_cmp = 0x5002, /* insn_move */ insn_mov = 0x6001, /* move */ insn_movcc = 0x6002, /* conditional move */ insn_xchg = 0x6003, /* exchange */ insn_xchgcc = 0x6004, /* conditional exchange */ /* insn_string */ insn_strcmp = 0x7001, insn_strload = 0x7002, insn_strmov = 0x7003, insn_strstore = 0x7004, insn_translate = 0x7005, /* xlat */ /* insn_bit_manip */ insn_bittest = 0x8001, insn_bitset = 0x8002, insn_bitclear = 0x8003, /* insn_flag_manip */ insn_clear_carry = 0x9001, insn_clear_zero = 0x9002, insn_clear_oflow = 0x9003, insn_clear_dir = 0x9004, insn_clear_sign = 0x9005, insn_clear_parity = 0x9006, insn_set_carry = 0x9007, insn_set_zero = 0x9008, insn_set_oflow = 0x9009, insn_set_dir = 0x900A, insn_set_sign = 0x900B, insn_set_parity = 0x900C, insn_tog_carry = 0x9010, insn_tog_zero = 0x9020, insn_tog_oflow = 0x9030, insn_tog_dir = 0x9040, insn_tog_sign = 0x9050, insn_tog_parity = 0x9060, /* insn_fpu */ insn_fmov = 0xA001, insn_fmovcc = 0xA002, insn_fneg = 0xA003, insn_fabs = 0xA004, insn_fadd = 0xA005, insn_fsub = 0xA006, insn_fmul = 0xA007, insn_fdiv = 0xA008, insn_fsqrt = 0xA009, insn_fcmp = 0xA00A, insn_fcos = 0xA00C, insn_fldpi = 0xA00D, insn_fldz = 0xA00E, insn_ftan = 0xA00F, insn_fsine = 0xA010, insn_fsys = 0xA020, /* insn_interrupt */ insn_int = 0xD001, insn_intcc = 0xD002, /* not present in x86 ISA */ insn_iret = 0xD003, insn_bound = 0xD004, insn_debug = 0xD005, insn_trace = 0xD006, insn_invalid_op = 0xD007, insn_oflow = 0xD008, /* insn_system */ insn_halt = 0xE001, insn_in = 0xE002, /* input from port/bus */ insn_out = 0xE003, /* output to port/bus */ insn_cpuid = 0xE004, /* insn_other */ insn_nop = 0xF001, insn_bcdconv = 0xF002, /* convert to or from BCD */ insn_szconv = 0xF003 /* change size of operand */ }; enum x86_insn_note { insn_note_ring0 = 1, /* Only available in ring 0 */ insn_note_smm = 2, /* "" in System Management Mode */ insn_note_serial = 4 /* Serializing instruction */ }; enum x86_flag_status { insn_carry_set = 0x1, insn_zero_set = 0x2, insn_oflow_set = 0x4, insn_dir_set = 0x8, insn_sign_set = 0x10, insn_parity_set = 0x20, insn_carry_or_zero_set = 0x40, insn_zero_set_or_sign_ne_oflow = 0x80, insn_carry_clear = 0x100, insn_zero_clear = 0x200, insn_oflow_clear = 0x400, insn_dir_clear = 0x800, insn_sign_clear = 0x1000, insn_parity_clear = 0x2000, insn_sign_eq_oflow = 0x4000, insn_sign_ne_oflow = 0x8000 }; enum x86_insn_cpu { cpu_8086 = 1, /* Intel */ cpu_80286 = 2, cpu_80386 = 3, cpu_80387 = 4, cpu_80486 = 5, cpu_pentium = 6, cpu_pentiumpro = 7, cpu_pentium2 = 8, cpu_pentium3 = 9, >br> cpu_pentium4 = 10, cpu_k6 = 16, /* AMD */ cpu_k7 = 32, cpu_athlon = 48 }; enum x86_insn_isa { isa_gp = 1, /* general purpose */ isa_fp = 2, /* floating point */ isa_fpumgt = 3, /* FPU/SIMD management */ isa_mmx = 4, /* Intel MMX */ isa_sse1 = 5, /* Intel SSE SIMD */ isa_sse2 = 6, /* Intel SSE2 SIMD */ isa_sse3 = 7, /* Intel SSE3 SIMD */ isa_3dnow = 8, /* AMD 3DNow! SIMD */ isa_sys = 9 /* system instructions */ }; enum x86_insn_prefix { insn_no_prefix = 0, insn_rep_zero = 1, insn_rep_notzero = 2, insn_lock = 4, insn_delay = 8 }; typedef struct { /* information about the instruction */ unsigned long addr; /* load address */ unsigned long offset; /* offset into file/buffer */ enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */ enum x86_insn_type type; /* type, e.g. INSN_BRANCH */ unsigned char bytes[MAX_INSN_SIZE]; unsigned char size; /* size of insn in bytes */ enum x86_insn_prefix prefix; enum x86_flag_status flags_set; /* flags set or tested by insn */ enum x86_flag_status flags_tested; /* the instruction proper */ char prefix_string[32]; /* prefixes [might be truncated] */ char mnemonic[8]; x86_op_t operands[3]; /* convenience fields for user */ void *block; /* code block containing this insn */ void *function; /* function containing this insn */ void *tag; /* tag the insn as seen/processed */ } x86_insn_t; #define X86_WILDCARD_BYTE 0xF4 typedef struct { enum x86_op_type type; /* operand type */ enum x86_op_datatype datatype; /* operand size */ enum x86_op_access access; /* operand access [RWX] */ enum x86_op_flags flags; /* misc flags */ } x86_invariant_op_t; typedef struct { unsigned char bytes[64]; /* invariant representation */ unsigned int size; /* number of bytes in insn */ enum x86_insn_group group; /* meta-type, e.g. INSN_EXEC */ enum x86_insn_type type; /* type, e.g. INSN_BRANCH */ x86_invariant_op_t operands[3]; /* use same ordering as x86_insn_t */ } x86_invariant_t;
EXAMPLES
The following sample callback outputs instructions in raw syntax: void raw_print( x86_insn_t *insn, void *arg ) { char line[1024]; x86_format_insn(insn, line, 1024, raw_syntax); printf( "%s0, line); } The following sample resolver performs very limited checking on the operand of a jump or call to determine what program address the operand refers to: long resolver( x86_op_t *op, x86_insn_t *insn ) { long retval = -1; /* this is a flat ripoff of internal_resolver in libdis.c -- we don't do any register or stack resolving, or check to see if we have already encountered this RVA */ if ( op->type == op_absolute || op->type == op_offset ) { retval = op->data.sdword; } else if (op->type == op_relative ){ if ( op->datatype == op_byte ) { retval = insn->addr + insn->size + op->data.sbyte; } else if ( op->datatype == op_word ) { retval = insn->addr + insn->size + op->data.sword; } else if ( op->datatype == op_dword ) { retval = insn->addr + insn->size + op->data.sdword; } } return( retval ); } The following code snippets demonstrate how to use the various disassembly routines: unsigned char *buf; /* buffer of bytes to disassemble */ unsigned int buf_len;/* length of buffer */ unsigned long rva; /* load address of start of buffer */ unsigned int pos; /* position in buffer */ x86_insn_t insn; /* disassembled instruction */ /* disassemble entire buffer, printing automatically */ x86_disasm_range( buf, buf_rva, pos, buf_len, raw_print, NULL ); /* disassemble a single instruction, then print it */ if (x86_disasm( buf, buf_len, buf_rva, pos, &insn ) ) { raw_print( &insn, NULL ); } /* disassemble forward in 'buf' starting at 'pos' */ x86_disasm_forward( buf, buf_len, buf_rva, pos, raw_print, NULL, resolver );
SEE ALSO
libdisasm(7), x86_format_insn(3), x86_init(3), x86dis(1)