You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1113 satır
27KB

  1. /* udis86 - libudis86/decode.c
  2. *
  3. * Copyright (c) 2002-2009 Vivek Thampi
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification,
  7. * are permitted provided that the following conditions are met:
  8. *
  9. * * Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  19. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  22. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "udint.h"
  27. #include "types.h"
  28. #include "decode.h"
  29. #ifndef __UD_STANDALONE__
  30. # include <string.h>
  31. #endif /* __UD_STANDALONE__ */
  32. /* The max number of prefixes to an instruction */
  33. #define MAX_PREFIXES 15
  34. /* rex prefix bits */
  35. #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
  36. #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
  37. #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
  38. #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
  39. #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
  40. ( P_REXR(n) << 2 ) | \
  41. ( P_REXX(n) << 1 ) | \
  42. ( P_REXB(n) << 0 ) )
  43. /* scable-index-base bits */
  44. #define SIB_S(b) ( ( b ) >> 6 )
  45. #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
  46. #define SIB_B(b) ( ( b ) & 7 )
  47. /* modrm bits */
  48. #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
  49. #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
  50. #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
  51. #define MODRM_RM(b) ( ( b ) & 7 )
  52. static int decode_ext(struct ud *u, uint16_t ptr);
  53. enum reg_class { /* register classes */
  54. REGCLASS_GPR,
  55. REGCLASS_MMX,
  56. REGCLASS_CR,
  57. REGCLASS_DB,
  58. REGCLASS_SEG,
  59. REGCLASS_XMM
  60. };
  61. /*
  62. * inp_start
  63. * Should be called before each de-code operation.
  64. */
  65. static void
  66. inp_start(struct ud *u)
  67. {
  68. u->inp_ctr = 0;
  69. }
  70. static uint8_t
  71. inp_next(struct ud *u)
  72. {
  73. if (u->inp_end == 0) {
  74. if (u->inp_buf != NULL) {
  75. if (u->inp_buf_index < u->inp_buf_size) {
  76. u->inp_ctr++;
  77. return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
  78. }
  79. } else {
  80. int c;
  81. if ((c = u->inp_hook(u)) != UD_EOI) {
  82. u->inp_curr = c;
  83. u->inp_sess[u->inp_ctr++] = u->inp_curr;
  84. return u->inp_curr;
  85. }
  86. }
  87. }
  88. u->inp_end = 1;
  89. UDERR(u, "byte expected, eoi received\n");
  90. return 0;
  91. }
  92. static uint8_t
  93. inp_curr(struct ud *u)
  94. {
  95. return u->inp_curr;
  96. }
  97. /*
  98. * inp_uint8
  99. * int_uint16
  100. * int_uint32
  101. * int_uint64
  102. * Load little-endian values from input
  103. */
  104. static uint8_t
  105. inp_uint8(struct ud* u)
  106. {
  107. return inp_next(u);
  108. }
  109. static uint16_t
  110. inp_uint16(struct ud* u)
  111. {
  112. uint16_t r, ret;
  113. ret = inp_next(u);
  114. r = inp_next(u);
  115. return ret | (r << 8);
  116. }
  117. static uint32_t
  118. inp_uint32(struct ud* u)
  119. {
  120. uint32_t r, ret;
  121. ret = inp_next(u);
  122. r = inp_next(u);
  123. ret = ret | (r << 8);
  124. r = inp_next(u);
  125. ret = ret | (r << 16);
  126. r = inp_next(u);
  127. return ret | (r << 24);
  128. }
  129. static uint64_t
  130. inp_uint64(struct ud* u)
  131. {
  132. uint64_t r, ret;
  133. ret = inp_next(u);
  134. r = inp_next(u);
  135. ret = ret | (r << 8);
  136. r = inp_next(u);
  137. ret = ret | (r << 16);
  138. r = inp_next(u);
  139. ret = ret | (r << 24);
  140. r = inp_next(u);
  141. ret = ret | (r << 32);
  142. r = inp_next(u);
  143. ret = ret | (r << 40);
  144. r = inp_next(u);
  145. ret = ret | (r << 48);
  146. r = inp_next(u);
  147. return ret | (r << 56);
  148. }
  149. static inline int
  150. eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
  151. {
  152. if (dis_mode == 64) {
  153. return rex_w ? 64 : (pfx_opr ? 16 : 32);
  154. } else if (dis_mode == 32) {
  155. return pfx_opr ? 16 : 32;
  156. } else {
  157. UD_ASSERT(dis_mode == 16);
  158. return pfx_opr ? 32 : 16;
  159. }
  160. }
  161. static inline int
  162. eff_adr_mode(int dis_mode, int pfx_adr)
  163. {
  164. if (dis_mode == 64) {
  165. return pfx_adr ? 32 : 64;
  166. } else if (dis_mode == 32) {
  167. return pfx_adr ? 16 : 32;
  168. } else {
  169. UD_ASSERT(dis_mode == 16);
  170. return pfx_adr ? 32 : 16;
  171. }
  172. }
  173. /*
  174. * decode_prefixes
  175. *
  176. * Extracts instruction prefixes.
  177. */
  178. static int
  179. decode_prefixes(struct ud *u)
  180. {
  181. int done = 0;
  182. uint8_t curr, last = 0;
  183. UD_RETURN_ON_ERROR(u);
  184. do {
  185. last = curr;
  186. curr = inp_next(u);
  187. UD_RETURN_ON_ERROR(u);
  188. if (u->inp_ctr == MAX_INSN_LENGTH) {
  189. UD_RETURN_WITH_ERROR(u, "max instruction length");
  190. }
  191. switch (curr)
  192. {
  193. case 0x2E:
  194. u->pfx_seg = UD_R_CS;
  195. break;
  196. case 0x36:
  197. u->pfx_seg = UD_R_SS;
  198. break;
  199. case 0x3E:
  200. u->pfx_seg = UD_R_DS;
  201. break;
  202. case 0x26:
  203. u->pfx_seg = UD_R_ES;
  204. break;
  205. case 0x64:
  206. u->pfx_seg = UD_R_FS;
  207. break;
  208. case 0x65:
  209. u->pfx_seg = UD_R_GS;
  210. break;
  211. case 0x67: /* adress-size override prefix */
  212. u->pfx_adr = 0x67;
  213. break;
  214. case 0xF0:
  215. u->pfx_lock = 0xF0;
  216. break;
  217. case 0x66:
  218. u->pfx_opr = 0x66;
  219. break;
  220. case 0xF2:
  221. u->pfx_str = 0xf2;
  222. break;
  223. case 0xF3:
  224. u->pfx_str = 0xf3;
  225. break;
  226. default:
  227. /* consume if rex */
  228. done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
  229. break;
  230. }
  231. } while (!done);
  232. /* rex prefixes in 64bit mode, must be the last prefix */
  233. if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
  234. u->pfx_rex = last;
  235. }
  236. return 0;
  237. }
  238. static inline unsigned int modrm( struct ud * u )
  239. {
  240. if ( !u->have_modrm ) {
  241. u->modrm = inp_next( u );
  242. u->have_modrm = 1;
  243. }
  244. return u->modrm;
  245. }
  246. static unsigned int
  247. resolve_operand_size( const struct ud * u, unsigned int s )
  248. {
  249. switch ( s )
  250. {
  251. case SZ_V:
  252. return ( u->opr_mode );
  253. case SZ_Z:
  254. return ( u->opr_mode == 16 ) ? 16 : 32;
  255. case SZ_Y:
  256. return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
  257. case SZ_RDQ:
  258. return ( u->dis_mode == 64 ) ? 64 : 32;
  259. default:
  260. return s;
  261. }
  262. }
  263. static int resolve_mnemonic( struct ud* u )
  264. {
  265. /* resolve 3dnow weirdness. */
  266. if ( u->mnemonic == UD_I3dnow ) {
  267. u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
  268. }
  269. /* SWAPGS is only valid in 64bits mode */
  270. if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
  271. UDERR(u, "swapgs invalid in 64bits mode\n");
  272. return -1;
  273. }
  274. if (u->mnemonic == UD_Ixchg) {
  275. if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
  276. u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
  277. (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
  278. u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
  279. u->operand[0].type = UD_NONE;
  280. u->operand[1].type = UD_NONE;
  281. u->mnemonic = UD_Inop;
  282. }
  283. }
  284. if (u->mnemonic == UD_Inop && u->pfx_repe) {
  285. u->pfx_repe = 0;
  286. u->mnemonic = UD_Ipause;
  287. }
  288. return 0;
  289. }
  290. /* -----------------------------------------------------------------------------
  291. * decode_a()- Decodes operands of the type seg:offset
  292. * -----------------------------------------------------------------------------
  293. */
  294. static void
  295. decode_a(struct ud* u, struct ud_operand *op)
  296. {
  297. if (u->opr_mode == 16) {
  298. /* seg16:off16 */
  299. op->type = UD_OP_PTR;
  300. op->size = 32;
  301. op->lval.ptr.off = inp_uint16(u);
  302. op->lval.ptr.seg = inp_uint16(u);
  303. } else {
  304. /* seg16:off32 */
  305. op->type = UD_OP_PTR;
  306. op->size = 48;
  307. op->lval.ptr.off = inp_uint32(u);
  308. op->lval.ptr.seg = inp_uint16(u);
  309. }
  310. }
  311. /* -----------------------------------------------------------------------------
  312. * decode_gpr() - Returns decoded General Purpose Register
  313. * -----------------------------------------------------------------------------
  314. */
  315. static enum ud_type
  316. decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
  317. {
  318. switch (s) {
  319. case 64:
  320. return UD_R_RAX + rm;
  321. case 32:
  322. return UD_R_EAX + rm;
  323. case 16:
  324. return UD_R_AX + rm;
  325. case 8:
  326. if (u->dis_mode == 64 && u->pfx_rex) {
  327. if (rm >= 4)
  328. return UD_R_SPL + (rm-4);
  329. return UD_R_AL + rm;
  330. } else return UD_R_AL + rm;
  331. case 0:
  332. /* invalid size in case of a decode error */
  333. UD_ASSERT(u->error);
  334. return UD_NONE;
  335. default:
  336. UD_ASSERT(!"invalid operand size");
  337. return UD_NONE;
  338. }
  339. }
  340. static void
  341. decode_reg(struct ud *u,
  342. struct ud_operand *opr,
  343. int type,
  344. int num,
  345. int size)
  346. {
  347. int reg;
  348. size = resolve_operand_size(u, size);
  349. switch (type) {
  350. case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
  351. case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
  352. case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
  353. case REGCLASS_CR : reg = UD_R_CR0 + num; break;
  354. case REGCLASS_DB : reg = UD_R_DR0 + num; break;
  355. case REGCLASS_SEG : {
  356. /*
  357. * Only 6 segment registers, anything else is an error.
  358. */
  359. if ((num & 7) > 5) {
  360. UDERR(u, "invalid segment register value\n");
  361. return;
  362. } else {
  363. reg = UD_R_ES + (num & 7);
  364. }
  365. break;
  366. }
  367. default:
  368. UD_ASSERT(!"invalid register type");
  369. return;
  370. }
  371. opr->type = UD_OP_REG;
  372. opr->base = reg;
  373. opr->size = size;
  374. }
  375. /*
  376. * decode_imm
  377. *
  378. * Decode Immediate values.
  379. */
  380. static void
  381. decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
  382. {
  383. op->size = resolve_operand_size(u, size);
  384. op->type = UD_OP_IMM;
  385. switch (op->size) {
  386. case 8: op->lval.sbyte = inp_uint8(u); break;
  387. case 16: op->lval.uword = inp_uint16(u); break;
  388. case 32: op->lval.udword = inp_uint32(u); break;
  389. case 64: op->lval.uqword = inp_uint64(u); break;
  390. default: return;
  391. }
  392. }
  393. /*
  394. * decode_mem_disp
  395. *
  396. * Decode mem address displacement.
  397. */
  398. static void
  399. decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
  400. {
  401. switch (size) {
  402. case 8:
  403. op->offset = 8;
  404. op->lval.ubyte = inp_uint8(u);
  405. break;
  406. case 16:
  407. op->offset = 16;
  408. op->lval.uword = inp_uint16(u);
  409. break;
  410. case 32:
  411. op->offset = 32;
  412. op->lval.udword = inp_uint32(u);
  413. break;
  414. case 64:
  415. op->offset = 64;
  416. op->lval.uqword = inp_uint64(u);
  417. break;
  418. default:
  419. return;
  420. }
  421. }
  422. /*
  423. * decode_modrm_reg
  424. *
  425. * Decodes reg field of mod/rm byte
  426. *
  427. */
  428. static inline void
  429. decode_modrm_reg(struct ud *u,
  430. struct ud_operand *operand,
  431. unsigned int type,
  432. unsigned int size)
  433. {
  434. uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
  435. decode_reg(u, operand, type, reg, size);
  436. }
  437. /*
  438. * decode_modrm_rm
  439. *
  440. * Decodes rm field of mod/rm byte
  441. *
  442. */
  443. static void
  444. decode_modrm_rm(struct ud *u,
  445. struct ud_operand *op,
  446. unsigned char type, /* register type */
  447. unsigned int size) /* operand size */
  448. {
  449. size_t offset = 0;
  450. unsigned char mod, rm;
  451. /* get mod, r/m and reg fields */
  452. mod = MODRM_MOD(modrm(u));
  453. rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
  454. /*
  455. * If mod is 11b, then the modrm.rm specifies a register.
  456. *
  457. */
  458. if (mod == 3) {
  459. decode_reg(u, op, type, rm, size);
  460. return;
  461. }
  462. /*
  463. * !11b => Memory Address
  464. */
  465. op->type = UD_OP_MEM;
  466. op->size = resolve_operand_size(u, size);
  467. if (u->adr_mode == 64) {
  468. op->base = UD_R_RAX + rm;
  469. if (mod == 1) {
  470. offset = 8;
  471. } else if (mod == 2) {
  472. offset = 32;
  473. } else if (mod == 0 && (rm & 7) == 5) {
  474. op->base = UD_R_RIP;
  475. offset = 32;
  476. } else {
  477. offset = 0;
  478. }
  479. /*
  480. * Scale-Index-Base (SIB)
  481. */
  482. if ((rm & 7) == 4) {
  483. inp_next(u);
  484. op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
  485. op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
  486. op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
  487. /* special conditions for base reference */
  488. if (op->index == UD_R_RSP) {
  489. op->index = UD_NONE;
  490. op->scale = UD_NONE;
  491. }
  492. if (op->base == UD_R_RBP || op->base == UD_R_R13) {
  493. if (mod == 0) {
  494. op->base = UD_NONE;
  495. }
  496. if (mod == 1) {
  497. offset = 8;
  498. } else {
  499. offset = 32;
  500. }
  501. }
  502. }
  503. } else if (u->adr_mode == 32) {
  504. op->base = UD_R_EAX + rm;
  505. if (mod == 1) {
  506. offset = 8;
  507. } else if (mod == 2) {
  508. offset = 32;
  509. } else if (mod == 0 && rm == 5) {
  510. op->base = UD_NONE;
  511. offset = 32;
  512. } else {
  513. offset = 0;
  514. }
  515. /* Scale-Index-Base (SIB) */
  516. if ((rm & 7) == 4) {
  517. inp_next(u);
  518. op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
  519. op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
  520. op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
  521. if (op->index == UD_R_ESP) {
  522. op->index = UD_NONE;
  523. op->scale = UD_NONE;
  524. }
  525. /* special condition for base reference */
  526. if (op->base == UD_R_EBP) {
  527. if (mod == 0) {
  528. op->base = UD_NONE;
  529. }
  530. if (mod == 1) {
  531. offset = 8;
  532. } else {
  533. offset = 32;
  534. }
  535. }
  536. }
  537. } else {
  538. const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
  539. UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
  540. const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
  541. UD_NONE, UD_NONE, UD_NONE, UD_NONE };
  542. op->base = bases[rm & 7];
  543. op->index = indices[rm & 7];
  544. if (mod == 0 && rm == 6) {
  545. offset = 16;
  546. op->base = UD_NONE;
  547. } else if (mod == 1) {
  548. offset = 8;
  549. } else if (mod == 2) {
  550. offset = 16;
  551. }
  552. }
  553. if (offset) {
  554. decode_mem_disp(u, offset, op);
  555. }
  556. }
  557. /*
  558. * decode_moffset
  559. * Decode offset-only memory operand
  560. */
  561. static void
  562. decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
  563. {
  564. opr->type = UD_OP_MEM;
  565. opr->size = resolve_operand_size(u, size);
  566. decode_mem_disp(u, u->adr_mode, opr);
  567. }
  568. /* -----------------------------------------------------------------------------
  569. * decode_operands() - Disassembles Operands.
  570. * -----------------------------------------------------------------------------
  571. */
  572. static int
  573. decode_operand(struct ud *u,
  574. struct ud_operand *operand,
  575. enum ud_operand_code type,
  576. unsigned int size)
  577. {
  578. operand->_oprcode = type;
  579. switch (type) {
  580. case OP_A :
  581. decode_a(u, operand);
  582. break;
  583. case OP_MR:
  584. decode_modrm_rm(u, operand, REGCLASS_GPR,
  585. MODRM_MOD(modrm(u)) == 3 ?
  586. Mx_reg_size(size) : Mx_mem_size(size));
  587. break;
  588. case OP_F:
  589. u->br_far = 1;
  590. /* intended fall through */
  591. case OP_M:
  592. if (MODRM_MOD(modrm(u)) == 3) {
  593. UDERR(u, "expected modrm.mod != 3\n");
  594. }
  595. /* intended fall through */
  596. case OP_E:
  597. decode_modrm_rm(u, operand, REGCLASS_GPR, size);
  598. break;
  599. case OP_G:
  600. decode_modrm_reg(u, operand, REGCLASS_GPR, size);
  601. break;
  602. case OP_sI:
  603. case OP_I:
  604. decode_imm(u, size, operand);
  605. break;
  606. case OP_I1:
  607. operand->type = UD_OP_CONST;
  608. operand->lval.udword = 1;
  609. break;
  610. case OP_N:
  611. if (MODRM_MOD(modrm(u)) != 3) {
  612. UDERR(u, "expected modrm.mod == 3\n");
  613. }
  614. /* intended fall through */
  615. case OP_Q:
  616. decode_modrm_rm(u, operand, REGCLASS_MMX, size);
  617. break;
  618. case OP_P:
  619. decode_modrm_reg(u, operand, REGCLASS_MMX, size);
  620. break;
  621. case OP_U:
  622. if (MODRM_MOD(modrm(u)) != 3) {
  623. UDERR(u, "expected modrm.mod == 3\n");
  624. }
  625. /* intended fall through */
  626. case OP_W:
  627. decode_modrm_rm(u, operand, REGCLASS_XMM, size);
  628. break;
  629. case OP_V:
  630. decode_modrm_reg(u, operand, REGCLASS_XMM, size);
  631. break;
  632. case OP_MU:
  633. decode_modrm_rm(u, operand, REGCLASS_XMM,
  634. MODRM_MOD(modrm(u)) == 3 ?
  635. Mx_reg_size(size) : Mx_mem_size(size));
  636. break;
  637. case OP_S:
  638. decode_modrm_reg(u, operand, REGCLASS_SEG, size);
  639. break;
  640. case OP_O:
  641. decode_moffset(u, size, operand);
  642. break;
  643. case OP_R0:
  644. case OP_R1:
  645. case OP_R2:
  646. case OP_R3:
  647. case OP_R4:
  648. case OP_R5:
  649. case OP_R6:
  650. case OP_R7:
  651. decode_reg(u, operand, REGCLASS_GPR,
  652. (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
  653. break;
  654. case OP_AL:
  655. case OP_AX:
  656. case OP_eAX:
  657. case OP_rAX:
  658. decode_reg(u, operand, REGCLASS_GPR, 0, size);
  659. break;
  660. case OP_CL:
  661. case OP_CX:
  662. case OP_eCX:
  663. decode_reg(u, operand, REGCLASS_GPR, 1, size);
  664. break;
  665. case OP_DL:
  666. case OP_DX:
  667. case OP_eDX:
  668. decode_reg(u, operand, REGCLASS_GPR, 2, size);
  669. break;
  670. case OP_ES:
  671. case OP_CS:
  672. case OP_DS:
  673. case OP_SS:
  674. case OP_FS:
  675. case OP_GS:
  676. /* in 64bits mode, only fs and gs are allowed */
  677. if (u->dis_mode == 64) {
  678. if (type != OP_FS && type != OP_GS) {
  679. UDERR(u, "invalid segment register in 64bits\n");
  680. }
  681. }
  682. operand->type = UD_OP_REG;
  683. operand->base = (type - OP_ES) + UD_R_ES;
  684. operand->size = 16;
  685. break;
  686. case OP_J :
  687. decode_imm(u, size, operand);
  688. operand->type = UD_OP_JIMM;
  689. break ;
  690. case OP_R :
  691. if (MODRM_MOD(modrm(u)) != 3) {
  692. UDERR(u, "expected modrm.mod == 3\n");
  693. }
  694. decode_modrm_rm(u, operand, REGCLASS_GPR, size);
  695. break;
  696. case OP_C:
  697. decode_modrm_reg(u, operand, REGCLASS_CR, size);
  698. break;
  699. case OP_D:
  700. decode_modrm_reg(u, operand, REGCLASS_DB, size);
  701. break;
  702. case OP_I3 :
  703. operand->type = UD_OP_CONST;
  704. operand->lval.sbyte = 3;
  705. break;
  706. case OP_ST0:
  707. case OP_ST1:
  708. case OP_ST2:
  709. case OP_ST3:
  710. case OP_ST4:
  711. case OP_ST5:
  712. case OP_ST6:
  713. case OP_ST7:
  714. operand->type = UD_OP_REG;
  715. operand->base = (type - OP_ST0) + UD_R_ST0;
  716. operand->size = 80;
  717. break;
  718. default :
  719. break;
  720. }
  721. return 0;
  722. }
  723. /*
  724. * decode_operands
  725. *
  726. * Disassemble upto 3 operands of the current instruction being
  727. * disassembled. By the end of the function, the operand fields
  728. * of the ud structure will have been filled.
  729. */
  730. static int
  731. decode_operands(struct ud* u)
  732. {
  733. decode_operand(u, &u->operand[0],
  734. u->itab_entry->operand1.type,
  735. u->itab_entry->operand1.size);
  736. decode_operand(u, &u->operand[1],
  737. u->itab_entry->operand2.type,
  738. u->itab_entry->operand2.size);
  739. decode_operand(u, &u->operand[2],
  740. u->itab_entry->operand3.type,
  741. u->itab_entry->operand3.size);
  742. return 0;
  743. }
  744. /* -----------------------------------------------------------------------------
  745. * clear_insn() - clear instruction structure
  746. * -----------------------------------------------------------------------------
  747. */
  748. static void
  749. clear_insn(register struct ud* u)
  750. {
  751. u->error = 0;
  752. u->pfx_seg = 0;
  753. u->pfx_opr = 0;
  754. u->pfx_adr = 0;
  755. u->pfx_lock = 0;
  756. u->pfx_repne = 0;
  757. u->pfx_rep = 0;
  758. u->pfx_repe = 0;
  759. u->pfx_rex = 0;
  760. u->pfx_str = 0;
  761. u->mnemonic = UD_Inone;
  762. u->itab_entry = NULL;
  763. u->have_modrm = 0;
  764. u->br_far = 0;
  765. memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
  766. memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
  767. memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
  768. }
  769. static inline int
  770. resolve_pfx_str(struct ud* u)
  771. {
  772. if (u->pfx_str == 0xf3) {
  773. if (P_STR(u->itab_entry->prefix)) {
  774. u->pfx_rep = 0xf3;
  775. } else {
  776. u->pfx_repe = 0xf3;
  777. }
  778. } else if (u->pfx_str == 0xf2) {
  779. u->pfx_repne = 0xf3;
  780. }
  781. return 0;
  782. }
  783. static int
  784. resolve_mode( struct ud* u )
  785. {
  786. int default64;
  787. /* if in error state, bail out */
  788. if ( u->error ) return -1;
  789. /* propagate prefix effects */
  790. if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
  791. /* Check validity of instruction m64 */
  792. if ( P_INV64( u->itab_entry->prefix ) ) {
  793. UDERR(u, "instruction invalid in 64bits\n");
  794. return -1;
  795. }
  796. /* effective rex prefix is the effective mask for the
  797. * instruction hard-coded in the opcode map.
  798. */
  799. u->pfx_rex = ( u->pfx_rex & 0x40 ) |
  800. ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
  801. /* whether this instruction has a default operand size of
  802. * 64bit, also hardcoded into the opcode map.
  803. */
  804. default64 = P_DEF64( u->itab_entry->prefix );
  805. /* calculate effective operand size */
  806. if ( REX_W( u->pfx_rex ) ) {
  807. u->opr_mode = 64;
  808. } else if ( u->pfx_opr ) {
  809. u->opr_mode = 16;
  810. } else {
  811. /* unless the default opr size of instruction is 64,
  812. * the effective operand size in the absence of rex.w
  813. * prefix is 32.
  814. */
  815. u->opr_mode = default64 ? 64 : 32;
  816. }
  817. /* calculate effective address size */
  818. u->adr_mode = (u->pfx_adr) ? 32 : 64;
  819. } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
  820. u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
  821. u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
  822. } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
  823. u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
  824. u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
  825. }
  826. return 0;
  827. }
  828. static inline int
  829. decode_insn(struct ud *u, uint16_t ptr)
  830. {
  831. UD_ASSERT((ptr & 0x8000) == 0);
  832. u->itab_entry = &ud_itab[ ptr ];
  833. u->mnemonic = u->itab_entry->mnemonic;
  834. return (resolve_pfx_str(u) == 0 &&
  835. resolve_mode(u) == 0 &&
  836. decode_operands(u) == 0 &&
  837. resolve_mnemonic(u) == 0) ? 0 : -1;
  838. }
  839. /*
  840. * decode_3dnow()
  841. *
  842. * Decoding 3dnow is a little tricky because of its strange opcode
  843. * structure. The final opcode disambiguation depends on the last
  844. * byte that comes after the operands have been decoded. Fortunately,
  845. * all 3dnow instructions have the same set of operand types. So we
  846. * go ahead and decode the instruction by picking an arbitrarily chosen
  847. * valid entry in the table, decode the operands, and read the final
  848. * byte to resolve the menmonic.
  849. */
  850. static inline int
  851. decode_3dnow(struct ud* u)
  852. {
  853. uint16_t ptr;
  854. UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
  855. UD_ASSERT(u->le->table[0xc] != 0);
  856. decode_insn(u, u->le->table[0xc]);
  857. inp_next(u);
  858. if (u->error) {
  859. return -1;
  860. }
  861. ptr = u->le->table[inp_curr(u)];
  862. UD_ASSERT((ptr & 0x8000) == 0);
  863. u->mnemonic = ud_itab[ptr].mnemonic;
  864. return 0;
  865. }
  866. static int
  867. decode_ssepfx(struct ud *u)
  868. {
  869. uint8_t idx;
  870. uint8_t pfx;
  871. /*
  872. * String prefixes (f2, f3) take precedence over operand
  873. * size prefix (66).
  874. */
  875. pfx = u->pfx_str;
  876. if (pfx == 0) {
  877. pfx = u->pfx_opr;
  878. }
  879. idx = ((pfx & 0xf) + 1) / 2;
  880. if (u->le->table[idx] == 0) {
  881. idx = 0;
  882. }
  883. if (idx && u->le->table[idx] != 0) {
  884. /*
  885. * "Consume" the prefix as a part of the opcode, so it is no
  886. * longer exported as an instruction prefix.
  887. */
  888. u->pfx_str = 0;
  889. if (pfx == 0x66) {
  890. /*
  891. * consume "66" only if it was used for decoding, leaving
  892. * it to be used as an operands size override for some
  893. * simd instructions.
  894. */
  895. u->pfx_opr = 0;
  896. }
  897. }
  898. return decode_ext(u, u->le->table[idx]);
  899. }
  900. /*
  901. * decode_ext()
  902. *
  903. * Decode opcode extensions (if any)
  904. */
  905. static int
  906. decode_ext(struct ud *u, uint16_t ptr)
  907. {
  908. uint8_t idx = 0;
  909. if ((ptr & 0x8000) == 0) {
  910. return decode_insn(u, ptr);
  911. }
  912. u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
  913. if (u->le->type == UD_TAB__OPC_3DNOW) {
  914. return decode_3dnow(u);
  915. }
  916. switch (u->le->type) {
  917. case UD_TAB__OPC_MOD:
  918. /* !11 = 0, 11 = 1 */
  919. idx = (MODRM_MOD(modrm(u)) + 1) / 4;
  920. break;
  921. /* disassembly mode/operand size/address size based tables.
  922. * 16 = 0,, 32 = 1, 64 = 2
  923. */
  924. case UD_TAB__OPC_MODE:
  925. idx = u->dis_mode != 64 ? 0 : 1;
  926. break;
  927. case UD_TAB__OPC_OSIZE:
  928. idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
  929. break;
  930. case UD_TAB__OPC_ASIZE:
  931. idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
  932. break;
  933. case UD_TAB__OPC_X87:
  934. idx = modrm(u) - 0xC0;
  935. break;
  936. case UD_TAB__OPC_VENDOR:
  937. if (u->vendor == UD_VENDOR_ANY) {
  938. /* choose a valid entry */
  939. idx = (u->le->table[idx] != 0) ? 0 : 1;
  940. } else if (u->vendor == UD_VENDOR_AMD) {
  941. idx = 0;
  942. } else {
  943. idx = 1;
  944. }
  945. break;
  946. case UD_TAB__OPC_RM:
  947. idx = MODRM_RM(modrm(u));
  948. break;
  949. case UD_TAB__OPC_REG:
  950. idx = MODRM_REG(modrm(u));
  951. break;
  952. case UD_TAB__OPC_SSE:
  953. return decode_ssepfx(u);
  954. default:
  955. UD_ASSERT(!"not reached");
  956. break;
  957. }
  958. return decode_ext(u, u->le->table[idx]);
  959. }
  960. static int
  961. decode_opcode(struct ud *u)
  962. {
  963. uint16_t ptr;
  964. UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
  965. UD_RETURN_ON_ERROR(u);
  966. u->primary_opcode = inp_curr(u);
  967. ptr = u->le->table[inp_curr(u)];
  968. if (ptr & 0x8000) {
  969. u->le = &ud_lookup_table_list[ptr & ~0x8000];
  970. if (u->le->type == UD_TAB__OPC_TABLE) {
  971. inp_next(u);
  972. return decode_opcode(u);
  973. }
  974. }
  975. return decode_ext(u, ptr);
  976. }
  977. /* =============================================================================
  978. * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
  979. * =============================================================================
  980. */
  981. unsigned int
  982. ud_decode(struct ud *u)
  983. {
  984. inp_start(u);
  985. clear_insn(u);
  986. u->le = &ud_lookup_table_list[0];
  987. u->error = decode_prefixes(u) == -1 ||
  988. decode_opcode(u) == -1 ||
  989. u->error;
  990. /* Handle decode error. */
  991. if (u->error) {
  992. /* clear out the decode data. */
  993. clear_insn(u);
  994. /* mark the sequence of bytes as invalid. */
  995. u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
  996. u->mnemonic = u->itab_entry->mnemonic;
  997. }
  998. /* maybe this stray segment override byte
  999. * should be spewed out?
  1000. */
  1001. if ( !P_SEG( u->itab_entry->prefix ) &&
  1002. u->operand[0].type != UD_OP_MEM &&
  1003. u->operand[1].type != UD_OP_MEM )
  1004. u->pfx_seg = 0;
  1005. u->insn_offset = u->pc; /* set offset of instruction */
  1006. u->asm_buf_fill = 0; /* set translation buffer index to 0 */
  1007. u->pc += u->inp_ctr; /* move program counter by bytes decoded */
  1008. /* return number of bytes disassembled. */
  1009. return u->inp_ctr;
  1010. }
  1011. /*
  1012. vim: set ts=2 sw=2 expandtab
  1013. */