vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

emu.c (49656B)


      1 /*
      2  * Simple instruction scanning and rewriting
      3  * for implementing vx32 on x86-32 hosts.
      4  */
      5 
      6 #ifdef __APPLE__
      7 #define __DARWIN_UNIX03 0
      8 #endif
      9 
     10 #include <stdio.h>
     11 #include <stdlib.h>
     12 #include <stddef.h>
     13 #include <string.h>
     14 #include <setjmp.h>
     15 #include <assert.h>
     16 #include <errno.h>
     17 #include <sys/stat.h>		// XX FreeBSD 4.9 header bug?
     18 #include <sys/mman.h>
     19 #include <stdarg.h>
     20 #include <unistd.h>
     21 
     22 #include "vx32.h"
     23 #include "vx32impl.h"
     24 #include "os.h"
     25 #include "x86dis.h"
     26 
     27 // Special values for unused entries in entrypoint hash table
     28 #define NULLSRCEIP		((uint32_t)-1)
     29 #define NULLDSTEIP		((uint32_t)(uintptr_t)vxrun_nullfrag);
     30 
     31 int vx32_debugxlate = 0;
     32 
     33 static uint64_t nflush;
     34 
     35 static void disassemble(uint8_t *addr0, uint8_t*, uint8_t*);
     36 
     37 // Create the emulation state for a new process
     38 int vxemu_init(struct vxproc *vxp)
     39 {
     40 	// Initial emulation hash table size (must be a power of two)
     41 	int etablen = 4096;
     42 
     43 	// Allocate the vxemu state area in 32-bit memory,
     44 	// because it must be accessible to our translated code
     45 	// via the special fs segment register setup.
     46 	vxemu *e = mmap(NULL, VXCODEBUFSIZE,
     47 			PROT_READ | PROT_WRITE | PROT_EXEC,
     48 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
     49 	if (e == MAP_FAILED){
     50 		vxprint("vxemu_init: mmap failed\n");
     51 		return -1;
     52 	}
     53 
     54 	// Basic initialization
     55 	memset(e, 0, sizeof(vxemu));
     56 	e->magic = VXEMU_MAGIC;
     57 	e->proc = vxp;
     58 	vxp->cpu = &e->cpu;
     59 	e->emuptr = (uint32_t)(intptr_t)e;
     60 	e->etablen = etablen;
     61 	e->etabmask = etablen - 1;
     62 
     63 	// Initialize the entrypoint table and translation buffer pointers
     64 	vxemu_flush(e);
     65 
     66 	vxp->emu = e;
     67 	return 0;
     68 }
     69 
     70 void vxemu_free(vxemu *e)
     71 {
     72 	assert(e->proc->emu == e);
     73 	e->proc->emu = NULL;
     74 
     75 	// Free the vxemu state area
     76 	munmap(e, VXCODEBUFSIZE);
     77 }
     78 
     79 // Reset a vxproc's translation code buffer and entrypoint table.
     80 void vxemu_flush(vxemu *e)
     81 {
     82 	uint32_t i;
     83 
     84 	// Clear the entrypoint table.
     85 	uint32_t etablen = e->etablen;
     86 	for (i = 0; i < etablen; i++) {
     87 		e->etab[i].srceip = NULLSRCEIP;
     88 		e->etab[i].dsteip = NULLDSTEIP;
     89 	}
     90 	e->etabcnt = 0;
     91 
     92 	// The translated code buffer immediately follows the etab.
     93 	e->codebuf = &e->etab[etablen];
     94 	e->codefree = &e->etab[etablen];
     95 	e->codetab = (void*)e + VXCODEBUFSIZE;
     96 	e->codetop = (void*)e + VXCODEBUFSIZE;
     97 
     98 	nflush++;
     99 }
    100 
    101 void vxemu_growetab(struct vxemu *e)
    102 {
    103 	// Increase the size of the entrypoint table,
    104 	// which effectively just reserves more memory
    105 	// from the code translation buffer.
    106 	e->etablen *= 2;
    107 	e->etabmask = e->etablen - 1;
    108 
    109 	// Re-initialize the entrypoint table and translation buffer.
    110 	vxemu_flush(e);
    111 }
    112 
    113 // Each translated frag starts with a one-instruction prolog...
    114 #define PROLOG_LEN		7	// Length of 'mov VSEG:VXEMU_EBX,%ebx'
    115 
    116 
    117 // Translate a block of code starting at the current vx32 EIP.
    118 // The basic procedure works in four stages.
    119 //
    120 // 1: We first scan the instruction stream to build up a
    121 // tentative vxinsn table for the instructions we plan to translate,
    122 // with output code offsets computed for worst-case instruction lengths.
    123 // This pass handles checking execute permissions on instruction pages,
    124 // and decides exactly how many instructions we'll translate in this block.
    125 // The final instruction in a fragment is always either
    126 // an unconditional flow control instruction (JMP, CALL, RET, INT, etc.),
    127 // or the special "pseudo-instruction" VXI_ENDFRAG,
    128 // which ends the fragment with a jump to the appropriate subsequent EIP.
    129 //
    130 // 2: Next we do a reverse scan through the vxinsn table
    131 // to identify instructions we can simplify:
    132 // particularly instructions with condition code fixups
    133 // whose condition codes are not actually used before they are killed.
    134 // We also identify branches that can be rewritten with 8-bit displacements.
    135 // In the process we adjust the target instruction length (dstlen) fields
    136 // for all simplified instructions accordingly.
    137 //
    138 // 3: We now perform a forward scan through the vxinsn table
    139 // to compute the final offsets for all target instructions in the block.
    140 //
    141 // 4: Finally, we scan the instruction stream again
    142 // and emit the target instructions for the block.
    143 //
    144 
    145 // Macros to extract fields in a Mod-Reg-R/M byte
    146 #define EA_MOD(b)	((uint8_t)(b) >> 6)
    147 #define EA_REG(b)	(((uint8_t)(b) >> 3) & 7)
    148 #define EA_RM(b)	((uint8_t)(b) & 7)
    149 
    150 // Scan a Mod-Reg-R/M byte and the rest of the effective address
    151 uint8_t *xscan_rm(uint8_t *inp)
    152 {
    153 	uint8_t ea = *inp++;
    154 	switch (EA_MOD(ea)) {
    155 	case 0:
    156 		switch (EA_RM(ea)) {
    157 		case 4:	; // SIB
    158 			uint8_t sib = *inp;
    159 			if ((sib & 7) == 5)
    160 				return inp+1+4;
    161 			else
    162 				return inp+1;
    163 		case 5:	// disp32
    164 			return inp+4;
    165 		default: // [reg]
    166 			return inp;
    167 		}
    168 
    169 	case 1:
    170 		switch (EA_RM(ea)) {
    171 		case 4:	// SIB+disp8
    172 			return inp+1+1;
    173 		default: // [reg]+disp8
    174 			return inp+1;
    175 		}
    176 
    177 	case 2:
    178 		switch (EA_RM(ea)) {
    179 		case 4: // SIB+disp32
    180 			return inp+1+4;
    181 		default: // [reg]+disp32
    182 			return inp+4;
    183 		}
    184 
    185 	case 3:	// reg
    186 		return inp;
    187 
    188 	default:
    189 		assert(0);
    190 		return 0;
    191 	}
    192 }
    193 
    194 // Translation pass 1:
    195 // scan instruction stream, build preliminary vxinsn table,
    196 // and decide how many instructions to translate in this fragment.
    197 static int xscan(struct vxproc *p)
    198 {
    199 	uint32_t faultva;
    200 	uint32_t eip;
    201 	uint8_t *instart, *inmax;
    202 	struct vxemu *emu = p->emu;
    203 
    204 	// Make sure there's enough space in the translated code buffer;
    205 	// if not, then first clear the code buffer and entrypoint table.
    206 	if (((uint8_t*)emu->codetab - (uint8_t*)emu->codefree) < 1024)
    207 		vxemu_flush(emu);
    208 
    209 	// Grow the entrypoint hash table if it gets too crowded.
    210 	// This also in effect flushes the translated code buffer.
    211 	if (emu->etabcnt > emu->etablen/2)
    212 		vxemu_growetab(emu);
    213 
    214 	// Find and check permissions on the input instruction stream,
    215 	// and determine how far ahead we can scan (up to one full page)
    216 	// before hitting a non-executable page.
    217 	eip = emu->cpu.eip;
    218 	instart = (uint8_t*)emu->mem->base + eip;
    219 	emu->guestfrag = instart;
    220 	if (!vxmem_checkperm(p->mem, eip, 2*VXPAGESIZE, VXPERM_EXEC, &faultva)) {
    221 		if(faultva == eip) {
    222 		noexec:
    223 			emu->cpu_trap = VXTRAP_PAGEFAULT;
    224 			emu->cpu.traperr = 0x10;
    225 			emu->cpu.trapva = faultva;
    226 			return emu->cpu_trap;
    227 		}
    228 	} else
    229 		faultva = VXPAGETRUNC(eip) + 2*VXPAGESIZE;
    230 	inmax = instart + faultva - eip;
    231 
    232 	// Create a new fragment header in the code translation buffer
    233 	struct vxfrag *f = (struct vxfrag*)(((intptr_t)emu->codefree + 3) & ~3);
    234 	emu->txfrag = f;
    235 	f->eip = eip;
    236 
    237 	unsigned ino = 0;	// instruction number
    238 	unsigned dstofs = PROLOG_LEN;
    239 	uint8_t *inp = instart;
    240 	emu->ininst = inp;	// save instruction currently being translated
    241 	int fin = 0;
    242 	do {
    243 		uint8_t itype = 0;
    244 		uint8_t dstlen;
    245 		uint8_t ea;
    246 		
    247 		if(*inp == 0xF0)	// LOCK
    248 			inp++;
    249 
    250 		// Begin instruction decode.
    251 		// We might take a fault on any of these instruction reads
    252 		// if we run off the end of a mapped code page.
    253 		// In that case our exception handler
    254 		// notices that emu->ininst != NULL and initiates recovery.
    255 		// Or we might _not_ take a fault
    256 		// on a page marked read-only but not executable;
    257 		// that's why we check against inmax after each insn.
    258 		switch (*inp++) {
    259 
    260 		// OP Eb,Gb; OP Ev,Gv; OP Gb,Eb; OP Gv,Ev
    261 		case 0x00: case 0x01: case 0x02: case 0x03:	// ADD
    262 		case 0x08: case 0x09: case 0x0a: case 0x0b:	// OR
    263 		case 0x10: case 0x11: case 0x12: case 0x13:	// ADC
    264 		case 0x18: case 0x19: case 0x1a: case 0x1b:	// SBB
    265 		case 0x20: case 0x21: case 0x22: case 0x23:	// AND
    266 		case 0x28: case 0x29: case 0x2a: case 0x2b:	// SUB
    267 		case 0x30: case 0x31: case 0x32: case 0x33:	// XOR
    268 		case 0x38: case 0x39: case 0x3a: case 0x3b:	// CMP
    269 		case 0x84: case 0x85:				// TEST
    270 		case 0x86: case 0x87:				// XCHG
    271 		case 0x88: case 0x89: case 0x8a: case 0x8b:	// MOV
    272 			inp = xscan_rm(inp);
    273 			goto notrans;
    274 
    275 		// OP AL,Ib; PUSH Ib
    276 		case 0x04: case 0x0c: case 0x14: case 0x1c:	// ADD etc.
    277 		case 0x24: case 0x2c: case 0x34: case 0x3c:	// AND etc.
    278 		case 0x6a:					// PUSH Ib
    279 		case 0xa8:					// TEST AL,Ib
    280 		case 0xb0: case 0xb1: case 0xb2: case 0xb3:	// MOV Gb,Ib
    281 		case 0xb4: case 0xb5: case 0xb6: case 0xb7:
    282 			inp += 1;
    283 			goto notrans;
    284 
    285 		// OP EAX,Iv; PUSH Iv; MOV moffs
    286 		case 0x05: case 0x0d: case 0x15: case 0x1d:	// OP EAX,Iv
    287 		case 0x25: case 0x2d: case 0x35: case 0x3d:
    288 		case 0x68:					// PUSH Iv
    289 		case 0xa0: case 0xa1: case 0xa2: case 0xa3:	// MOV moffs
    290 		case 0xa9:					// TEST eAX,Iv
    291 		case 0xb8: case 0xb9: case 0xba: case 0xbb:	// MOV Gv,Iv
    292 		case 0xbc: case 0xbd: case 0xbe: case 0xbf:
    293 			inp += 4;
    294 			goto notrans;
    295 
    296 		// CS and DS segment overrides, only valid for branch hints
    297 		case 0x2e:	// CS/"not taken"
    298 		case 0x3e:	// DS/"taken"
    299 			switch (*inp++) {
    300 
    301 			// Jcc (8-bit displacement)
    302 			case 0x70: case 0x71: case 0x72: case 0x73:
    303 			case 0x74: case 0x75: case 0x76: case 0x77:
    304 			case 0x78: case 0x79: case 0x7a: case 0x7b:
    305 			case 0x7c: case 0x7d: case 0x7e: case 0x7f:
    306 				inp += 1;
    307 				itype = VXI_JUMP;
    308 				dstlen = 7;	// 32-bit branch w/hint
    309 				goto done;
    310 
    311 			// Two-byte opcode
    312 			case 0x0f:
    313 				switch (*inp++) {
    314 
    315 				// Jcc - conditional branch with disp32
    316 				case 0x80: case 0x81: case 0x82: case 0x83:
    317 				case 0x84: case 0x85: case 0x86: case 0x87:
    318 				case 0x88: case 0x89: case 0x8a: case 0x8b:
    319 				case 0x8c: case 0x8d: case 0x8e: case 0x8f:
    320 					inp += 4;
    321 					itype = VXI_JUMP;
    322 					dstlen = 7;	// 32-bit branch w/hint
    323 					goto done;
    324 
    325 				}
    326 				goto invalid;
    327 			}
    328 			goto invalid;
    329 
    330 		// INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
    331 		case 0x40: case 0x41: case 0x42: case 0x43:	// INC
    332 		case 0x44: case 0x45: case 0x46: case 0x47:
    333 		case 0x48: case 0x49: case 0x4a: case 0x4b:	// DEC
    334 		case 0x4c: case 0x4d: case 0x4e: case 0x4f:
    335 		case 0x50: case 0x51: case 0x52: case 0x53:	// PUSH
    336 		case 0x54: case 0x55: case 0x56: case 0x57:
    337 		case 0x58: case 0x59: case 0x5a: case 0x5b:	// POP
    338 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:
    339 		case 0x90: case 0x91: case 0x92: case 0x93:	// XCHG
    340 		case 0x94: case 0x95: case 0x96: case 0x97:
    341 		case 0x98: case 0x99:				// CWDE, CDQ
    342 		case 0xa4: case 0xa5: case 0xa6: case 0xa7:	// MOVS, CMPS
    343 		case 0xaa: case 0xab:				// STOS
    344 		case 0xac: case 0xad: case 0xae: case 0xaf:	// LODS, SCAS
    345 		case 0xc9:					// LEAVE
    346 		case 0xfc: case 0xfd:				// CLD, STD
    347 			goto notrans;
    348 
    349 		// OP Eb,Ib; OP Ev,Ib; IMUL Gv,Ev,Ib
    350 		case 0x80:					// OP Eb,Ib
    351 		case 0x83:					// OP Ev,Ib
    352 		case 0x6b:					// IMUL Gv,Ev,Ib
    353 			inp = xscan_rm(inp);
    354 			inp += 1;
    355 			goto notrans;
    356 
    357 		// OP Ev,Iv; IMUL Gv,Ev,Iv
    358 		case 0x81:					// OP Ev,Iv
    359 		case 0x69:					// IMUL Gv,Ev,Iv
    360 			inp = xscan_rm(inp);
    361 			inp += 4;
    362 			goto notrans;
    363 
    364 		// Jcc (8-bit displacement)
    365 		case 0x70: case 0x71: case 0x72: case 0x73:
    366 		case 0x74: case 0x75: case 0x76: case 0x77:
    367 		case 0x78: case 0x79: case 0x7a: case 0x7b:
    368 		case 0x7c: case 0x7d: case 0x7e: case 0x7f:
    369 			inp += 1;
    370 			itype = VXI_JUMP;
    371 			dstlen = 6;	// Size of worst-case 32-bit branch
    372 			goto done;
    373 
    374 		// LEA Gv,M
    375 		case 0x8d:
    376 			if (EA_MOD(*inp) == 3)	// Mem-only
    377 				goto invalid;
    378 			inp = xscan_rm(inp);
    379 			goto notrans;
    380 
    381 		// Group 1a - POP Ev
    382 		case 0x8f:
    383 			if (EA_REG(*inp) != 0)
    384 				goto invalid;
    385 			inp = xscan_rm(inp);
    386 			goto notrans;
    387 
    388 		// FWAIT
    389 		case 0x9b:
    390 			if (p->allowfp == 0) {
    391 			badfp:
    392 				if (ino > 0)
    393 					goto endfrag;
    394 				emu->cpu_trap = VXTRAP_FPOFF;
    395 				return emu->cpu_trap;
    396 			}
    397 			goto notrans;
    398 
    399 		// PUSHF; POPF
    400 		case 0x9c: case 0x9d:
    401 			goto notrans;
    402 
    403 		// SAHF; LAHF
    404 		case 0x9f: case 0x9e:
    405 			goto notrans;
    406 
    407 		// Shift Eb,Ib; Shift Ev,Ib
    408 		case 0xc0: case 0xc1:
    409 			inp = xscan_rm(inp);
    410 			inp += 1;
    411 			// XXX fix CCs
    412 			goto notrans;
    413 
    414 		// Shift Eb,1; Shift Ev,1
    415 		case 0xd0: case 0xd1:
    416 			inp = xscan_rm(inp);
    417 			// XXX fix CCs
    418 			goto notrans;
    419 
    420 		// Shift Eb,CL; Shift Ev,CL
    421 		case 0xd2: case 0xd3:
    422 			inp = xscan_rm(inp);
    423 			// XXX fix CCs
    424 			goto notrans;
    425 
    426 		// RET Iw
    427 		case 0xc2:
    428 			inp += 2;
    429 			itype = VXI_RETURN_IMM;
    430 			dstlen = 7+1+6+5;	// movl %ebx,VSEG:VXEMU_EBX
    431 						// popl %ebx
    432 						// addl $Iw,%esp
    433 						// jmp vxrun_lookup_indirect
    434 			fin = 1;
    435 			goto done;
    436 
    437 		// RET
    438 		case 0xc3:
    439 			itype = VXI_RETURN;
    440 			dstlen = 7+1+5;		// movl %ebx,VSEG:VXEMU_EBX
    441 						// popl %ebx
    442 						// jmp vxrun_lookup_indirect
    443 			fin = 1;
    444 			goto done;
    445 
    446 		// Group 11 - MOV Eb,Ib
    447 		case 0xc6:
    448 			if (EA_REG(*inp) != 0)
    449 				goto invalid;
    450 			inp = xscan_rm(inp);
    451 			inp += 1;
    452 			goto notrans;
    453 
    454 		// Group 11 - MOV Ev,Iv
    455 		case 0xc7:
    456 			if (EA_REG(*inp) != 0)
    457 				goto invalid;
    458 			inp = xscan_rm(inp);
    459 			inp += 4;
    460 			goto notrans;
    461 
    462 		// ENTER
    463 		case 0xc8:
    464 			inp += 2+1;		// imm16,imm8
    465 			goto notrans;
    466 
    467 		case 0xcd:			// INT n (software interrupt)
    468 			inp++;
    469 		case 0xcc:			// INT3 (breakpoint)
    470 			goto gentrap;
    471 
    472 		// 387 escapes - modrm with opcode field
    473 		case 0xd8: case 0xd9: case 0xda: case 0xdb:
    474 		case 0xdc: case 0xdd: case 0xde: case 0xdf:
    475 			if (!p->allowfp)
    476 				goto badfp;
    477 			if ((*inp>>6) == 3)
    478 				inp++;
    479 			else
    480 				inp = xscan_rm(inp);
    481 			goto notrans;
    482 
    483 		// Loops
    484 		case 0xe0:	// LOOPNZ cb
    485 			inp++;
    486 			itype = VXI_LOOPNZ;
    487 			dstlen = 3+2+2+5;	// leal -1(ecx), ecx
    488 						// jz .+7
    489 						// jecxz .+5
    490 						// jmp cb
    491 			goto done;
    492 
    493 		case 0xe1:	// LOOPZ cb
    494 			inp++;
    495 			itype = VXI_LOOPZ;
    496 			dstlen = 3+2+2+5;	// leal -1(ecx), ecx
    497 						// jnz .+7
    498 						// jecxz .+5
    499 						// jmp cb
    500 			goto done;
    501 
    502 		case 0xe2:	// LOOP cb
    503 			inp++;
    504 			itype = VXI_LOOP;
    505 			dstlen = 3+2+5;	// leal -1(ecx), ecx
    506 						// jecxz .+5
    507 						// jmp cb
    508 			goto done;
    509 
    510 		// CALL
    511 		case 0xe8:				// CALL Jv
    512 			inp += 4;
    513 			itype = VXI_CALL;
    514 			dstlen = 5+5;		// pushl $nexteip
    515 						// jmp trampoline
    516 			fin = 1;
    517 			goto done;
    518 
    519 		// JMP
    520 		case 0xe9:				// JMP Jv
    521 			inp += 4;
    522 			itype = VXI_JUMP;
    523 			dstlen = 5;	// Size of worst-case 32-bit JMP
    524 			fin = 1;
    525 			goto done;
    526 
    527 		// JMP short
    528 		case 0xeb:				// JMP Jb
    529 			inp += 1;
    530 			itype = VXI_JUMP;
    531 			dstlen = 5;	// Size of worst-case 32-bit JMP
    532 			fin = 1;
    533 			goto done;
    534 
    535 		// Group 3 - unary ops
    536 		case 0xf6:
    537 			ea = *inp;
    538 			inp = xscan_rm(inp);
    539 			switch (EA_REG(ea)) {
    540 			case 0: case 1:			// TEST Eb,Ib
    541 				inp += 1;
    542 			default:			// NOT, NEG, ...
    543 				; // XXX MUL/DIV require fixcc!
    544 			}
    545 			goto notrans;
    546 
    547 		case 0xf7:
    548 			ea = *inp;
    549 			inp = xscan_rm(inp);
    550 			switch (EA_REG(ea)) {
    551 			case 0: case 1:			// TEST Ev,Iv
    552 				inp += 4;
    553 			default:			// NOT, NEG, ...
    554 				; // XXX MUL/DIV require fixcc!
    555 			}
    556 			goto notrans;
    557 
    558 		// Group 4 - INC, DEC
    559 		case 0xfe:
    560 			ea = *inp;
    561 			inp = xscan_rm(inp);
    562 			switch (EA_REG(ea)) {
    563 			case 0: case 1:			// INC Eb, DEC Eb
    564 				goto notrans;
    565 			}
    566 			goto invalid;
    567 
    568 		// Group 5 - INC, DEC, CALL, JMP, PUSH
    569 		case 0xff:
    570 			ea = *inp;
    571 			inp = xscan_rm(inp);
    572 			switch (EA_REG(ea)) {
    573 			case 0: case 1:			// INC Ev, DEC Ev
    574 			case 6:				// PUSH Ev
    575 				goto notrans;
    576 			case 2:				// CALL Ev
    577 				itype = VXI_CALLIND;
    578 				dstlen = 7+(inp-emu->ininst)+5+5;
    579 					// movl %ebx,VSEG:VXEMU_EBX
    580 					// movl <indirect_ea>,%ebx
    581 					//	(same length as CALL inst)
    582 					// pushl $<return_eip>
    583 					// jmp vxrun_lookup_indirect
    584 				fin = 1;
    585 				goto done;
    586 			case 4:				// JMP Ev
    587 				itype = VXI_JUMPIND;
    588 				dstlen = 7+(inp-emu->ininst)+5;
    589 					// movl %ebx,VSEG:VXEMU_EBX
    590 					// movl <indirect_ea>,%ebx
    591 					//	(same length as CALL inst)
    592 					// jmp vxrun_lookup_indirect
    593 				fin = 1;
    594 				goto done;
    595 			}
    596 			goto invalid;
    597 		
    598 		// I/O
    599 		case 0xed:
    600 			goto gentrap;
    601 
    602 		// Prefixes
    603 		case 0x0f:	// 2-byte opcode escape
    604 			goto twobyte;
    605 		case 0x66:	// Operand size prefix
    606 			goto opsize;
    607 		case 0xf3:	// REP/REPE prefix
    608 			goto rep;
    609 		case 0xf2:	// REPNE prefix
    610 			goto repne;
    611 		}
    612 		goto invalid;
    613 
    614 	// Operand size prefix (0x66) seen
    615 	opsize:
    616 		switch (*inp++) {
    617 
    618 		// OP Ev,Gv; OP Gv,Ev
    619 		case 0x01: case 0x03:				// ADD
    620 		case 0x09: case 0x0b:				// OR
    621 		case 0x11: case 0x13:				// ADC
    622 		case 0x19: case 0x1b:				// SBB
    623 		case 0x21: case 0x23:				// AND
    624 		case 0x29: case 0x2b:				// SUB
    625 		case 0x31: case 0x33:				// XOR
    626 		case 0x39: case 0x3b:				// CMP
    627 		case 0x85:					// TEST
    628 		case 0x87:					// XCHG
    629 		case 0x89: case 0x8b:				// MOV
    630 			inp = xscan_rm(inp);
    631 			goto notrans;
    632 
    633 		// OP EAX,Iv; PUSH Iv
    634 		case 0x05: case 0x0d: case 0x15: case 0x1d:	// OP EAX,Iv
    635 		case 0x25: case 0x2d: case 0x35: case 0x3d:
    636 		case 0x68:					// PUSH Iv
    637 		case 0xa9:					// TEST eAX,Iv
    638 		case 0xb8: case 0xb9: case 0xba: case 0xbb:	// MOV Gv,Iv
    639 		case 0xbc: case 0xbd: case 0xbe: case 0xbf:
    640 			inp += 2;
    641 			goto notrans;
    642 
    643 		// INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg
    644 		case 0x40: case 0x41: case 0x42: case 0x43:	// INC
    645 		case 0x44: case 0x45: case 0x46: case 0x47:
    646 		case 0x48: case 0x49: case 0x4a: case 0x4b:	// DEC
    647 		case 0x4c: case 0x4d: case 0x4e: case 0x4f:
    648 		case 0x50: case 0x51: case 0x52: case 0x53:	// PUSH
    649 		case 0x54: case 0x55: case 0x56: case 0x57:
    650 		case 0x58: case 0x59: case 0x5a: case 0x5b:	// POP
    651 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:
    652 		case 0x90: case 0x91: case 0x92: case 0x93:	// XCHG
    653 		case 0x94: case 0x95: case 0x96: case 0x97:
    654 		case 0x98: case 0x99:				// CWDE, CDQ
    655 		case 0xa4: case 0xa5: case 0xa6: case 0xa7:	// MOVS, CMPS
    656 		case 0xaa: case 0xab:				// STOS
    657 		case 0xac: case 0xad: case 0xae: case 0xaf:	// LODS, SCAS
    658 		case 0xc9:					// LEAVE
    659 		case 0xfc: case 0xfd:				// CLD, STD
    660 			goto notrans;
    661 
    662 		// OP Ev,Iv; IMUL Gv,Ev,Iv
    663 		case 0x81:					// OP Ev,Iv
    664 		case 0x69:					// IMUL Gv,Ev,Iv
    665 			inp = xscan_rm(inp);
    666 			inp += 2;
    667 			goto notrans;
    668 
    669 		// OP Ev,Ib; IMUL Gv,Ev,Ib
    670 		case 0x83:					// OP Ev,Ib
    671 		case 0x6b:					// IMUL Gv,Ev,Ib
    672 			inp = xscan_rm(inp);
    673 			inp += 1;
    674 			goto notrans;
    675 
    676 		// MOV moffs
    677 		case 0xa1: case 0xa3:
    678 			inp += 4;	// always 32-bit offset
    679 			goto notrans;
    680 
    681 		// Shift Ev,Ib
    682 		case 0xc1:
    683 			inp = xscan_rm(inp);
    684 			inp += 1;
    685 			// XXX fix CCs
    686 			goto notrans;
    687 
    688 		// Shift Ev,1
    689 		case 0xd1:
    690 			inp = xscan_rm(inp);
    691 			// XXX fix CCs
    692 			goto notrans;
    693 
    694 		// Shift Ev,CL
    695 		case 0xd3:
    696 			inp = xscan_rm(inp);
    697 			// XXX fix CCs
    698 			goto notrans;
    699 
    700 		// Group 11 - MOV Ev,Iv
    701 		case 0xc7:
    702 			if (EA_REG(*inp) != 0)
    703 				goto invalid;
    704 			inp = xscan_rm(inp);
    705 			inp += 2;
    706 			goto notrans;
    707 		
    708 		// Group 3 - unary ops
    709 		case 0xf7:
    710 			ea = *inp;
    711 			inp = xscan_rm(inp);
    712 			switch (EA_REG(ea)) {
    713 			case 0: case 1:			// TEST Ev,Iv
    714 				inp += 2;
    715 			default:			// NOT, NEG, ...
    716 				; // XXX MUL/DIV require fixcc!
    717 			}
    718 			goto notrans;
    719 
    720 		// Group 5 - INC, DEC, CALL, JMP, PUSH
    721 		case 0xff:
    722 			ea = *inp;
    723 			inp = xscan_rm(inp);
    724 			switch (EA_REG(ea)) {
    725 			case 0: case 1:			// INC Ev, DEC Ev
    726 				goto notrans;
    727 			}
    728 			goto invalid;
    729 
    730 		// Prefixes
    731 		case 0x0f:	// 2-byte opcode escape
    732 			goto twobyte_opsize;
    733 		case 0x66:	// Operand size prefix (redundant)
    734 			goto invalid;
    735 		case 0xf3:	// REP/REPE prefix
    736 			goto opsize_rep;
    737 		case 0xf2:	// REPNE prefix
    738 			goto opsize_repne;
    739 		}
    740 		goto invalid;
    741 
    742 	// REP/REPE prefix (0xf3) seen
    743 	rep:
    744 		switch (*inp++) {
    745 
    746 		// No-operand insns
    747 		case 0xa4: case 0xa5: case 0xa6: case 0xa7:	// MOVS, CMPS
    748 		case 0xaa: case 0xab:				// STOS
    749 		case 0xac: case 0xad: case 0xae: case 0xaf:	// LODS, SCAS
    750 			goto notrans;
    751 
    752 		// Prefixes
    753 		case 0x0f:	// 2-byte opcode escape
    754 			goto twobyte_rep;
    755 		case 0x66:	// Operand size prefix
    756 			goto opsize_rep;
    757 		case 0xf3:	// REP/REPE prefix (redundant)
    758 			goto invalid;
    759 		case 0xf2:	// REPNE prefix (conflicting)
    760 			goto invalid;
    761 		}
    762 		goto invalid;
    763 
    764 	// REPNE prefix (0xf2) seen
    765 	repne:
    766 		switch (*inp++) {
    767 
    768 		// No-operand insns
    769 		case 0xa6: case 0xa7:				// CMPS
    770 		case 0xae: case 0xaf:				// SCAS
    771 			goto notrans;
    772 
    773 		// Prefixes
    774 		case 0x0f:	// 2-byte opcode escape
    775 			goto twobyte_repne;
    776 		case 0x66:	// Operand size prefix
    777 			goto opsize_repne;
    778 		case 0xf3:	// REP/REPE prefix (conflicting)
    779 			goto invalid;
    780 		case 0xf2:	// REPNE prefix (redundant)
    781 			goto invalid;
    782 		}
    783 		goto invalid;
    784 
    785 
    786 	// Operand size prefix (0x66) and REP/REPE prefix (0xf3) seen
    787 	opsize_rep:
    788 		switch (*inp++) {
    789 		case 0xa5: case 0xa7:				// MOVS, CMPS
    790 		case 0xab:					// STOS
    791 		case 0xad: case 0xaf:				// LODS, SCAS
    792 			goto notrans;
    793 		}
    794 		goto invalid;
    795 
    796 	// Operand size prefix (0x66) and REPNE prefix (0xf2) seen
    797 	opsize_repne:
    798 		switch (*inp++) {
    799 		case 0xa7:					// CMPS
    800 		case 0xaf:					// SCAS
    801 			goto notrans;
    802 		}
    803 		goto invalid;
    804 
    805 
    806 	twobyte:
    807 		switch (*inp++) {
    808 
    809 		// SYSCALL instruction for fast system calls
    810 		case 0x05:
    811 			goto gentrap;
    812 
    813 		// No additional operand
    814 		case 0xc8: case 0xc9: case 0xca: case 0xcb:	// BSWAP
    815 		case 0xcc: case 0xcd: case 0xce: case 0xcf:
    816 			goto notrans;
    817 
    818 		// General EA operands
    819 		case 0x10: case 0x11:			// MOVUPS
    820 		case 0x12:				// MOVLPS Vps,Mq/MOVHLPS
    821 		case 0x14: case 0x15:			// UNPCKLPS/UNPCKHPS
    822 		case 0x16:				// MOVHPS Vps,Mq/MOVLHPS
    823 		case 0x28: case 0x29:			// MOVAPS
    824 		case 0x2e: case 0x2f:			// UCOMISS/COMISS
    825 		case 0x40: case 0x41: case 0x42: case 0x43:	// CMOVcc
    826 		case 0x44: case 0x45: case 0x46: case 0x47:
    827 		case 0x48: case 0x49: case 0x4a: case 0x4b:
    828 		case 0x4c: case 0x4d: case 0x4e: case 0x4f:
    829 		case 0x51:					// SQRTPS
    830 		case 0x54: case 0x55: case 0x56: case 0x57:	// ANDPS etc.
    831 		case 0x58: case 0x59: case 0x5a: case 0x5b:	// ADDPS etc.
    832 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:	// SUBPS etc.
    833 		case 0xa3:					// BT Ev,Gv
    834 		case 0xab:					// BTS Ev,Gv
    835 		case 0xaf:					// IMUL Gv,Ev
    836 		case 0xb0:					// CMPXCHG Eb,Gb
    837 		case 0xb1:					// CMPXCHG Ev,Gv
    838 		case 0xb3:					// BTR Ev,Gv
    839 		case 0xb6: case 0xb7:				// MOVZX
    840 		case 0xbb:					// BTC Ev,Gv
    841 		case 0xbc: case 0xbd:				// BSF, BSR
    842 		case 0xbe: case 0xbf:				// MOVSX
    843 		case 0xc0:					// XADD Eb,Gb
    844 		case 0xc1:					// XADD Ev,Gv
    845 			inp = xscan_rm(inp);
    846 			goto notrans;
    847 
    848 		// General EA operands plus immediate byte
    849 		case 0xc2:				// CMPPS Vps,Wps,Ib
    850 		case 0xc6:				// SHUFPS Vps,Wps,Ib
    851 			inp = xscan_rm(inp);
    852 			inp += 1;
    853 			goto notrans;
    854 
    855 		// Memory-only EA operand
    856 		case 0x13:				// MOVLPS Mq,Vps
    857 		case 0x17:				// MOVHPS Mq,Vps
    858 		case 0x2b:				// MOVNTPS
    859 		case 0xc3:				// MOVNTI Md,Gd
    860 			if (EA_MOD(*inp) == 3)	// Mem-only
    861 				goto invalid;
    862 			inp = xscan_rm(inp);
    863 			goto notrans;
    864 
    865 		// Register-only EA operand
    866 		case 0x50:				// MOVMSKPS
    867 			if (EA_MOD(*inp) != 3)	// Reg-only
    868 				goto invalid;
    869 			inp = xscan_rm(inp);
    870 			goto notrans;
    871 
    872 		// Jcc - conditional branch with disp32
    873 		case 0x80: case 0x81: case 0x82: case 0x83:
    874 		case 0x84: case 0x85: case 0x86: case 0x87:
    875 		case 0x88: case 0x89: case 0x8a: case 0x8b:
    876 		case 0x8c: case 0x8d: case 0x8e: case 0x8f:
    877 			inp += 4;
    878 			itype = VXI_JUMP;
    879 			dstlen = 6;	// Size of worst-case 32-bit branch
    880 			goto done;
    881 
    882 		// SETcc - set byte based on condition
    883 		case 0x90: case 0x91: case 0x92: case 0x93:
    884 		case 0x94: case 0x95: case 0x96: case 0x97:
    885 		case 0x98: case 0x99: case 0x9a: case 0x9b:
    886 		case 0x9c: case 0x9d: case 0x9e: case 0x9f:
    887 			if (EA_REG(*inp) != 0)
    888 				goto invalid;
    889 			inp = xscan_rm(inp);
    890 			goto notrans;
    891 
    892 		// Shift instructions
    893 		case 0xa4:					// SHLD Ev,Gv,Ib
    894 		case 0xac:					// SHRD Ev,Gv,Ib
    895 			inp = xscan_rm(inp);
    896 			inp += 1;
    897 			// XXX fix cc
    898 			goto notrans;
    899 		case 0xa5:					// SHLD Ev,Gv,CL
    900 		case 0xad:					// SHRD Ev,Gv,CL
    901 			inp = xscan_rm(inp);
    902 			// XXX fix cc
    903 			goto notrans;
    904 
    905 		// Group 8 - Bit test/modify with immediate
    906 		case 0xba:
    907 			if (!(EA_REG(*inp) & 4))
    908 				goto invalid;
    909 			inp = xscan_rm(inp);
    910 			inp += 1;
    911 			goto invalid;
    912 
    913 		// Group 15 - SSE control
    914 		case 0xae:
    915 			ea = *inp;
    916 			inp = xscan_rm(inp);
    917 			switch (EA_REG(ea)) {
    918 			case 2:					// LDMXCSR
    919 			case 3:					// STMXCSR
    920 				if (EA_MOD(ea) == 3)	// Mem-only
    921 					goto invalid;
    922 				goto notrans;
    923 			// XX LFENCE, SFENCE, MFENCE?
    924 			}
    925 			goto invalid;
    926 
    927 		// Group 16 - PREFETCH
    928 		case 0x18:
    929 			if (EA_MOD(*inp) == 3)	// Mem-only
    930 				goto invalid;
    931 			// XX Squash to NOP if EA_REG(*inp) > 3?
    932 			inp = xscan_rm(inp);
    933 			goto notrans;
    934 
    935 		}
    936 		goto invalid;
    937 
    938 	twobyte_opsize:
    939 		switch (*inp++) {
    940 
    941 		// General EA operands
    942 		case 0x10: case 0x11:			// MOVUPD
    943 		case 0x14: case 0x15:			// UNPCKLPD/UNPCKHPD
    944 		case 0x28: case 0x29:			// MOVAPD
    945 		case 0x2e: case 0x2f:			// UCOMISD/COMISD
    946 		case 0x40: case 0x41: case 0x42: case 0x43:	// CMOVcc
    947 		case 0x44: case 0x45: case 0x46: case 0x47:
    948 		case 0x48: case 0x49: case 0x4a: case 0x4b:
    949 		case 0x4c: case 0x4d: case 0x4e: case 0x4f:
    950 		case 0x51:					// SQRTPD
    951 		case 0x54: case 0x55: case 0x56: case 0x57:	// ANDPD etc.
    952 		case 0x58: case 0x59: case 0x5a: case 0x5b:	// ADDPD etc.
    953 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:	// SUBPD etc.
    954 		case 0x60: case 0x61: case 0x62: case 0x63:	// PUNPCK...
    955 		case 0x64: case 0x65: case 0x66: case 0x67:	// PCMPGT...
    956 		case 0x68: case 0x69: case 0x6a: case 0x6b:	// PUNPCK...
    957 		case 0x6c: case 0x6d: case 0x6e: case 0x6f:	// PUNPCK...
    958 		case 0x74: case 0x75: case 0x76:		// PCMPEQ...
    959 		case 0x7e: case 0x7f:				// MOVD/MOVDQA
    960 		case 0xa3:					// BT Ev,Gv
    961 		case 0xab:					// BTS Ev,Gv
    962 		case 0xb3:					// BTR Ev,Gv
    963 		case 0xbb:					// BTC Ev,Gv
    964 		case 0xbc: case 0xbd:				// BSF, BSR
    965 		case 0xaf:					// IMUL Gv,Ev
    966 		case 0xb6:					// MOVZX Gv,Eb
    967 		case 0xbe:					// MOVSX Gv,Eb
    968 		case 0xd1: case 0xd2: case 0xd3:		// PSRLx
    969 		case 0xd4: case 0xd5: case 0xd6:		// PADDQ...
    970 		case 0xd8: case 0xd9: case 0xda: case 0xdb:	// PSUBUSB...
    971 		case 0xdc: case 0xdd: case 0xde: case 0xdf:	// PADDUSB...
    972 		case 0xe0: case 0xe1: case 0xe2: case 0xe3:	// PAVGB...
    973 		case 0xe4: case 0xe5: case 0xe6:		// PMULHUW...
    974 		case 0xe8: case 0xe9: case 0xea: case 0xeb:	// PSUBSB...
    975 		case 0xec: case 0xed: case 0xee: case 0xef:	// PADDSB...
    976 		case 0xf1: case 0xf2: case 0xf3:		// PSLLx
    977 		case 0xf4: case 0xf5: case 0xf6:		// PMULUDQ...
    978 		case 0xf8: case 0xf9: case 0xfa: case 0xfb:	// PSUBB...
    979 		case 0xfc: case 0xfd: case 0xfe:		// PADDB...
    980 			inp = xscan_rm(inp);
    981 			goto notrans;
    982 
    983 		// General EA operands plus immediate byte
    984 		case 0xc5:				// PEXTRW Gd,VRdq,Ib
    985 			if (EA_MOD(*inp) != 3)
    986 				goto invalid; // Reg-only
    987 		case 0x70:				// PSHUFD Vdq,Wdq,Ib
    988 		case 0xc2:				// CMPPD Vps,Wps,Ib
    989 		case 0xc4:				// PINSRW Vdq,Ew,Ib
    990 		case 0xc6:				// SHUFPD Vps,Wps,Ib
    991 			inp = xscan_rm(inp);
    992 			inp += 1;
    993 			goto notrans;
    994 
    995 		// Memory-only EA operand
    996 		case 0x12: case 0x13:			// MOVLPD
    997 		case 0x16: case 0x17:			// MOVHPD
    998 		case 0x2b:				// MOVNTPD
    999 		case 0xe7:				// MOVNTDQ Mdq,Vdq
   1000 			if (EA_MOD(*inp) == 3)		// Mem-only
   1001 				goto invalid;
   1002 			inp = xscan_rm(inp);
   1003 			goto notrans;
   1004 
   1005 		// Register-only EA operand
   1006 		case 0x50:				// MOVMSKPD
   1007 		case 0xd7:				// PMOVMSKB Gd,VRdq
   1008 		case 0xf7:				// MASKMOVQ Vdq,Wdq
   1009 			if (EA_MOD(*inp) != 3)		// Reg-only
   1010 				goto invalid;
   1011 			inp = xscan_rm(inp);
   1012 			goto notrans;
   1013 
   1014 		// Shift instructions
   1015 		case 0xa4:					// SHLD Ev,Gv,Ib
   1016 		case 0xac:					// SHRD Ev,Gv,Ib
   1017 			inp = xscan_rm(inp);
   1018 			inp += 1;
   1019 			// XXX fix cc
   1020 			goto notrans;
   1021 		case 0xa5:					// SHLD Ev,Gv,CL
   1022 		case 0xad:					// SHRD Ev,Gv,CL
   1023 			inp = xscan_rm(inp);
   1024 			// XXX fix cc
   1025 			goto notrans;
   1026 
   1027 		// Group 8 - Bit test/modify with immediate
   1028 		case 0xba:
   1029 			if (!(EA_REG(*inp) & 4))
   1030 				goto invalid;
   1031 			inp = xscan_rm(inp);
   1032 			inp += 1;
   1033 			goto invalid;
   1034 
   1035 		// Group 12, 13, 14 - SSE vector shift w/ immediate
   1036 		case 0x71: case 0x72: case 0x73:
   1037 			ea = *inp;
   1038 			inp = xscan_rm(inp);
   1039 			switch (EA_REG(ea)) {
   1040 			case 2: case 4: case 6:
   1041 				inp += 1;
   1042 				goto notrans;
   1043 			}
   1044 			goto invalid;
   1045 		}
   1046 		goto invalid;
   1047 
   1048 	twobyte_rep:
   1049 		switch (*inp++) {
   1050 
   1051 		// General EA operands
   1052 		case 0x10: case 0x11:				// MOVSS
   1053 		case 0x2a: case 0x2c: case 0x2d:		// CVT...
   1054 		case 0x51:					// SQRTSS
   1055 		case 0x58: case 0x59: case 0x5a: case 0x5b:	// ADDSS etc.
   1056 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:	// SUBSS etc.
   1057 		case 0x6f:					// MOVDQU
   1058 		case 0x7e: case 0x7f:				// MOVQ/MOVDQU
   1059 		case 0xe6:					// CVTDQ2PD
   1060 			inp = xscan_rm(inp);
   1061 			goto notrans;
   1062 
   1063 		// General EA operands plus immediate byte
   1064 		case 0x70:				// PSHUFHW Vq,Wq,Ib
   1065 		case 0xc2:				// CMPSS Vss,Wss,Ib
   1066 			inp = xscan_rm(inp);
   1067 			inp += 1;
   1068 			goto notrans;
   1069 		}
   1070 		goto invalid;
   1071 
   1072 	twobyte_repne:
   1073 		switch (*inp++) {
   1074 
   1075 		// General EA operands
   1076 		case 0x10: case 0x11:				// MOVSD
   1077 		case 0x2a: case 0x2c: case 0x2d:		// CVT...
   1078 		case 0x51:					// SQRTSD
   1079 		case 0x58: case 0x59: case 0x5a:		// ADDSD etc.
   1080 		case 0x5c: case 0x5d: case 0x5e: case 0x5f:	// SUBSD etc.
   1081 		case 0xe6:					// CVTPD2DQ
   1082 			inp = xscan_rm(inp);
   1083 			goto notrans;
   1084 
   1085 		// General EA operands plus immediate byte
   1086 		case 0x70:				// PSHUFLW Vq,Wq,Ib
   1087 		case 0xc2:				// CMPSD Vss,Wss,Ib
   1088 			inp = xscan_rm(inp);
   1089 			inp += 1;
   1090 			goto notrans;
   1091 		}
   1092 		goto invalid;
   1093 
   1094 
   1095 	invalid:
   1096 		vxrun_cleanup(emu);
   1097 		vxprint("invalid opcode %02x %02x %02x at eip %08x\n",
   1098 			emu->ininst[0], emu->ininst[1], emu->ininst[2],
   1099 			emu->cpu.eip + (emu->ininst - instart));
   1100 		vxrun_setup(emu);
   1101 	gentrap:
   1102 		fin = 1;
   1103 		itype = VXI_TRAP;
   1104 		dstlen = 6+5+11+5;	// movl %eax,VSEG:VXEMU_EAX
   1105 					// movl $fin,%eax
   1106 					// movl $eip,VSEG:VXEMU_EIP
   1107 					// jmp vxrun_gentrap
   1108 		goto done;
   1109 
   1110 
   1111 	notrans:
   1112 		// No translation of this instruction is required -
   1113 		// dstlen is the same as srclen.
   1114 		dstlen = inp - emu->ininst;
   1115 
   1116 	done:
   1117 		// Make sure this whole instruction was actually executable
   1118 		if (inp > inmax) {
   1119 			// If the whole first instruction isn't executable,
   1120 			// then just generate the trap immediately,
   1121 			// since we know it'll be required.
   1122 			if (ino == 0)
   1123 				goto noexec;
   1124 
   1125 			// Otherwise, just roll back
   1126 			// and stop translating before this instruction,
   1127 			// and let the exception (if any)
   1128 			// happen next time into the translator.
   1129 			goto endfrag;
   1130 		}
   1131 
   1132 		// Make sure there's actually room for the resulting code
   1133 		if (dstofs + dstlen > VXDSTOFS_MAX) {
   1134 
   1135 			// Roll back and end the frag before this instruction
   1136 			endfrag:
   1137 			fin = 1;
   1138 			itype = VXI_ENDFRAG;
   1139 			inp = emu->ininst;	// no source consumed
   1140 			dstlen = 5;		// jmp to next frag
   1141 		}
   1142 
   1143 		// Record the instruction record
   1144 		f->insn[ino].itype = itype;
   1145 		f->insn[ino].srcofs = emu->ininst - instart;
   1146 		f->insn[ino].dstofs = dstofs;
   1147 		f->insn[ino].dstlen = dstlen;
   1148 
   1149 		// Move on to next instruction
   1150 		ino++;
   1151 		emu->ininst = inp;
   1152 		dstofs += dstlen;
   1153 
   1154 	} while (!fin);
   1155 
   1156 	// Record the total number of instructions for this frag
   1157 	f->ninsn = ino;
   1158 	
   1159 // vxprint("%d ins - to %x\n", ino, emu->ininst - instart + eip);
   1160 	// Clear the special instruction-scanning exception state flag
   1161 	emu->guestfragend = emu->ininst;
   1162 	emu->ininst = NULL;
   1163 
   1164 	return 0;
   1165 }
   1166 
   1167 // Try to optimize jump instructions whose target
   1168 // is in the same fragment we're building.
   1169 static inline void xsimp_jump(struct vxproc *p, unsigned ino)
   1170 {
   1171 	struct vxemu *emu = p->emu;
   1172 	struct vxfrag *f = emu->txfrag;
   1173 	unsigned ninsn = f->ninsn;
   1174 	unsigned srcofs = f->insn[ino].srcofs;
   1175 	uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
   1176 
   1177 	// Skip any branch prediction hint prefix
   1178 	uint8_t opcode = *inp++;
   1179 	int dstlen = 2;
   1180 	uint32_t targofs = srcofs;
   1181 	if (opcode == 0x2e || opcode == 0x3e) {
   1182 		opcode = *inp++;
   1183 		dstlen = 3;
   1184 		targofs++;
   1185 	}
   1186 
   1187 	// Determine the jump target.
   1188 	if (opcode == 0xe9) {
   1189 		// 32-bit JMP
   1190 		targofs += 5 + *(int32_t*)inp;
   1191 	} else if (opcode == 0x0f) {
   1192 		// 32-bit Jcc
   1193 		targofs += 6 + *(int32_t*)inp;
   1194 	} else {
   1195 		// 8-bit JMP or Jcc or LOOP
   1196 		targofs += 2 + (int32_t)(int8_t)*inp;
   1197 	}
   1198 	if (targofs > f->insn[ninsn-1].srcofs)
   1199 		return;		// Target is not in this fragment
   1200 
   1201 	// Find the target in the insn table
   1202 	unsigned lo = 0;
   1203 	unsigned hi = ninsn-1;
   1204 	while (hi > lo) {
   1205 		unsigned mid = (lo + hi + 1) / 2;
   1206 		unsigned midofs = f->insn[mid].srcofs;
   1207 		if (targofs >= midofs)
   1208 			lo = mid;
   1209 		else
   1210 			hi = mid - 1;
   1211 	}
   1212 	if (targofs != f->insn[lo].srcofs)
   1213 		return;		// Jump target is _between_ instructions!
   1214 
   1215 	// Make sure target is still in range after translation
   1216 	if (lo > ino) {
   1217 		if ((int)f->insn[lo].dstofs >
   1218 				(int)f->insn[ino+1].dstofs+127)
   1219 			return;	// too far ahead
   1220 	} else {
   1221 		if ((int)f->insn[lo].dstofs <
   1222 				(int)f->insn[ino].dstofs+3-128)
   1223 			return;	// too far behind
   1224 	}
   1225 
   1226 	// In range - convert it to an 8-bit jump!
   1227 	f->insn[ino].itype = VXI_JUMP8;
   1228 	f->insn[ino].dstlen = dstlen;
   1229 }
   1230 
   1231 // Translation pass 2:
   1232 // Reverse scan through the instruction table trying to simplify instructions.
   1233 static void xsimp(struct vxproc *p)
   1234 {
   1235 	int i;
   1236 	struct vxemu *emu = p->emu;
   1237 	struct vxfrag *f = emu->txfrag;
   1238 	unsigned ninsn = f->ninsn;
   1239 
   1240 	for (i = ninsn-1; i >= 0; i--) {
   1241 		unsigned itype = f->insn[i].itype;
   1242 
   1243 		switch (itype) {
   1244 		case VXI_LOOP:
   1245 		case VXI_LOOPZ:
   1246 		case VXI_LOOPNZ:
   1247 		case VXI_JUMP:
   1248 			xsimp_jump(p, i);
   1249 			break;
   1250 		default:
   1251 			break;	// no simplifications
   1252 		}
   1253 
   1254 	}
   1255 }
   1256 
   1257 // Translation pass 3:
   1258 // Compute final instruction offsets.
   1259 static void xplace(struct vxproc *p)
   1260 {
   1261 	int i;
   1262 	struct vxemu *emu = p->emu;
   1263 	struct vxfrag *f = emu->txfrag;
   1264 	unsigned ninsn = f->ninsn;
   1265 
   1266 	size_t outofs = PROLOG_LEN;
   1267 	for (i = 0; i < ninsn; i++) {
   1268 		f->insn[i].dstofs = outofs;
   1269 		outofs += f->insn[i].dstlen;
   1270 	}
   1271 }
   1272 
   1273 // Emit a direct 32-bit jump/branch/call/endfrag instruction.
   1274 // The original jump might have been either short or long.
   1275 // NB. vxemu_sighandler (sig.c) knows that jumps don't trash registers.
   1276 // NB. vxemu_sighandler knows that calls push the return address 
   1277 // onto the stack as the first instruction, and that the target address
   1278 // can be found at offset 26 of the translation.
   1279 static inline void xemit_jump(
   1280 		struct vxproc *p, uint8_t itype, unsigned ino,
   1281 		uint8_t **extrap)
   1282 {
   1283 	extern void vxrun_lookup_backpatch();
   1284 
   1285 	struct vxemu *emu = p->emu;
   1286 	struct vxfrag *f = emu->txfrag;
   1287 
   1288 	// Determine the jump target EIP
   1289 	// and emit the appropriate call/jump/branch instruction,
   1290 	// with its target pointing to a temporary jump trampoline.
   1291 	uint8_t *tramp = *extrap;
   1292 	unsigned srcofs = f->insn[ino].srcofs;
   1293 	uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
   1294 	uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
   1295 	uint32_t targeip = emu->cpu.eip + srcofs;
   1296 	if (itype == VXI_JUMP) {
   1297 
   1298 		uint8_t opcode = *inp;
   1299 
   1300 		// Copy any branch taken/not taken hint prefix
   1301 		if (opcode == 0x2e || opcode == 0x3e) {
   1302 			*outp++ = opcode;
   1303 			opcode = *++inp;
   1304 			targeip++;
   1305 		}
   1306 
   1307 		// Emit the branch/jump/call instruction
   1308 		switch (opcode) {
   1309 
   1310 		case 0xe9:	// was a 32-bit JMP
   1311 			targeip += 5 + *(int32_t*)&inp[1];
   1312 			goto emitjmp;
   1313 
   1314 		case 0xeb:	// was an 8-bit JMP
   1315 			targeip += 2 + (int32_t)(int8_t)inp[1];
   1316 		emitjmp:
   1317 			outp[0] = 0xe9;		// always emit 32-bit JMP
   1318 			*(int32_t*)&outp[1] = (int32_t)(tramp - (outp+5));
   1319 			outp += 5;
   1320 			break;
   1321 
   1322 		case 0x0f:	// was a 32-bit Jcc
   1323 			opcode = inp[1];
   1324 			targeip += 6 + *(int32_t*)&inp[2];
   1325 			goto emitjcc;
   1326 
   1327 		default:	// was an 8-bit Jcc
   1328 			opcode = inp[0] + 0x10;
   1329 			targeip += 2 + (int32_t)(int8_t)inp[1];
   1330 		emitjcc:
   1331 			outp[0] = 0x0f;		// always emit 32-bit Jcc
   1332 			outp[1] = opcode;
   1333 			*(int32_t*)&outp[2] = (int32_t)(tramp - (outp+6));
   1334 			outp += 6;
   1335 			break;
   1336 		}
   1337 	} else if (itype == VXI_CALL) {
   1338 		assert(*inp == 0xe8);	// 32-bit CALL
   1339 		
   1340 		outp[0] = 0x68;		// pushl $<return_eip>
   1341 		*(uint32_t*)&outp[1] = targeip + 5;
   1342 		outp += 5;
   1343 		targeip += 5 + *(int32_t*)&inp[1];
   1344 		goto emitjmp;
   1345 	} else if (itype == VXI_LOOP || itype == VXI_LOOPZ || itype == VXI_LOOPNZ) {
   1346 		*outp++ = 0x8d;	// leal -1(ecx) -> ecx
   1347 		*outp++ = 0x49;
   1348 		*outp++ = 0xff;
   1349 		if (itype == VXI_LOOPZ) {
   1350 			*outp++ = 0x75;	// jnz .+7
   1351 			*outp++ = 0x07;
   1352 		} else if (itype == VXI_LOOPNZ) {
   1353 			*outp++ = 0x74;	// jz .+7
   1354 			*outp++ = 0x07;
   1355 		}
   1356 		*outp++ = 0xe3;	// jecxz .+5
   1357 		*outp++ = 0x05;
   1358 		targeip += 2 + (int32_t)(int8_t)inp[1];
   1359 		goto emitjmp;
   1360 	} else {
   1361 		// End-of-fragment pseudo-instruction.
   1362 		// targeip already points to the eip we wish to "jump" to.
   1363 		assert(itype == VXI_ENDFRAG);
   1364 		goto emitjmp;
   1365 	}
   1366 
   1367 	// Emit the trampoline code
   1368 	tramp[0] = VSEGPREFIX;		// movl $patchrec,VSEG:VXEMU_JMPINFO
   1369 	tramp[1] = 0xc7;
   1370 	tramp[2] = 0x05;
   1371 	*(uint32_t*)&tramp[3] = offsetof(vxemu,jmpinfo);
   1372 	*(uint32_t*)&tramp[7] = (uint32_t)((intptr_t)tramp+11+5 -
   1373 						(intptr_t)emu);
   1374 
   1375 	tramp[11+0] = 0xe9;		// jmp vxrun_lookup_backpatch
   1376 	*(uint32_t*)&tramp[11+1] = (uint32_t)((intptr_t)vxrun_lookup_backpatch
   1377 					- (intptr_t)&tramp[11+5]);
   1378 
   1379 	*(uint32_t*)&tramp[11+5] = targeip;		// .long targeip
   1380 	*(uint32_t*)&tramp[11+5+4] = (uint32_t)(intptr_t)outp; // .long jmpend
   1381 	*extrap = &tramp[11+5+4+4];
   1382 }
   1383 
   1384 // Emit a short (8-bit) jump/branch instruction.
   1385 // The original branch might have been either short or long.
   1386 // NB. vxemu_sighandler (sig.c) knows that jump8s don't
   1387 // trash registers.
   1388 static inline void xemit_jump8(struct vxproc *p, unsigned ino)
   1389 {
   1390 	struct vxemu *emu = p->emu;
   1391 	struct vxfrag *f = emu->txfrag;
   1392 	unsigned srcofs = f->insn[ino].srcofs;
   1393 	uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
   1394 	uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
   1395 
   1396 	// Copy any branch taken/not taken hint prefix
   1397 	uint8_t opcode = *inp;
   1398 	int outlen = 2;
   1399 	uint32_t targofs = srcofs;
   1400 	if (opcode == 0x2e || opcode == 0x3e) {
   1401 		*outp++ = opcode;
   1402 		opcode = *++inp;
   1403 		outlen = 3;
   1404 		targofs++;
   1405 	}
   1406 
   1407 	// Determine the jump target and output opcode.
   1408 	switch (opcode) {
   1409 	case 0xe9:	// 32-bit JMP
   1410 		opcode = 0xeb;
   1411 		targofs += 5 + *(int32_t*)&inp[1];
   1412 		break;
   1413 	case 0x0f:	// 32-bit Jcc
   1414 		opcode = inp[1] - 0x10;
   1415 		targofs += 6 + *(int32_t*)&inp[2];
   1416 		break;
   1417 	case 0xeb:	// 8-bit JMP
   1418 	case 0xe0:	// 8-bit LOOP
   1419 	case 0xe1:
   1420 	case 0xe2:
   1421 	default:	// 8-bit Jcc
   1422 		targofs += 2 + (int32_t)(int8_t)inp[1];
   1423 		break;
   1424 	}
   1425 	assert(targofs <= f->insn[f->ninsn-1].srcofs);
   1426 
   1427 	// Find the target in the insn table
   1428 	unsigned lo = 0;
   1429 	unsigned hi = f->ninsn-1;
   1430 	while (hi > lo) {
   1431 		unsigned mid = (lo + hi + 1) / 2;
   1432 		unsigned midofs = f->insn[mid].srcofs;
   1433 		if (targofs >= midofs)
   1434 			lo = mid;
   1435 		else
   1436 			hi = mid - 1;
   1437 	}
   1438 	assert(targofs == f->insn[lo].srcofs);
   1439 
   1440 	// Emit the 2-byte jump instruction (3 bytes with prediction hint)
   1441 	outp[0] = opcode;
   1442 	outp[1] = (int)f->insn[lo].dstofs - ((int)f->insn[ino].dstofs+outlen);
   1443 }
   1444 
   1445 // Emit an indirect jump/call/ret instruction.
   1446 // NB. vxemu_sighandler (sig.c) knows that ebx is saved as
   1447 // the first instruction and then trashed.  
   1448 // NB. vxemu_sighandler knows that the immediate count 
   1449 // in a return immediate instruction is at offset 10.
   1450 // NB. vxemu_sighandler knows that in an indirect call:
   1451 //	* the stack is unchanged until offset -5 (from the end)
   1452 //	* at offset -5, the return address has been pushed
   1453 //	  and the target eip is in ebx.
   1454 static inline void xemit_indir(struct vxproc *p, int itype, unsigned ino)
   1455 {
   1456 	unsigned i;
   1457 	extern void vxrun_lookup_indirect();
   1458 
   1459 	struct vxemu *emu = p->emu;
   1460 	struct vxfrag *f = emu->txfrag;
   1461 	unsigned srcofs = f->insn[ino].srcofs;
   1462 	uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs;
   1463 	uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
   1464 	uint8_t *outp0 = outp;
   1465 
   1466 	// Common: movl %ebx,VSEG:VXEMU_EBX
   1467 	outp[0] = VSEGPREFIX;		// Appropriate segment override
   1468 	outp[1] = 0x89;
   1469 	outp[2] = 0x1d;
   1470 	*(uint32_t*)&outp[3] = offsetof(vxemu, cpu.reg[EBX]);
   1471 	outp += 7;
   1472 
   1473 	// Instruction-specific code
   1474 	switch (itype) {
   1475 	default:
   1476 		assert(0);
   1477 
   1478 	case VXI_CALLIND:
   1479 		assert(inp[0] == 0xff);
   1480 		assert(EA_REG(inp[1]) == 2);
   1481 		goto Common;
   1482 
   1483 	case VXI_JUMPIND:
   1484 		assert(inp[0] == 0xff);
   1485 		assert(EA_REG(inp[1]) == 4);
   1486 	Common:;
   1487 		unsigned srclen = xscan_rm(inp+1) - inp;
   1488 		outp[0] = 0x8b;		// movl <indirect_ea>,%ebx
   1489 		outp[1] = (inp[1] & 0xc7) | (EBX << 3);
   1490 		for (i = 2; i < srclen; i++)
   1491 			outp[i] = inp[i];
   1492 		outp += srclen;
   1493 		
   1494 		if(itype == VXI_CALLIND) {
   1495 			outp[0] = 0x68;		// pushl $<return_eip>
   1496 			*(uint32_t*)&outp[1] = emu->cpu.eip + srcofs + srclen;
   1497 			outp += 5;
   1498 		}
   1499 		break;
   1500 
   1501 	case VXI_RETURN:
   1502 		assert(inp[0] == 0xc3);
   1503 		*outp++ = 0x5b;		// popl %ebx
   1504 		break;
   1505 	
   1506 	case VXI_RETURN_IMM:
   1507 		assert(inp[0] == 0xc2);
   1508 		outp[0] = 0x5b;		// popl %ebx
   1509 		outp[1] = 0x81;		// add $<spc>,%esp
   1510 		outp[2] = 0xc4;
   1511 		*(uint32_t*)&outp[3] = *(uint16_t*)&inp[1];
   1512 		outp += 1+6;
   1513 		break;
   1514 	}
   1515 
   1516 	// Common: jmp vxrun_lookup_indirect
   1517 	outp[0] = 0xe9;
   1518 	*(uint32_t*)&outp[1] = (uint32_t)(intptr_t)vxrun_lookup_indirect -
   1519 				(uint32_t)(intptr_t)&outp[5];
   1520 	outp += 5;
   1521 	assert(outp - outp0 == f->insn[ino].dstlen);
   1522 }
   1523 
   1524 // NB. vxemu_sighandler (sig.c) knows that eax is saved as
   1525 // the first instruction and then trashed.
   1526 static void xemit_trap(struct vxproc *p, int ino)
   1527 {
   1528 	extern void vxrun_gentrap();
   1529 
   1530 	struct vxemu *emu = p->emu;
   1531 	struct vxfrag *f = emu->txfrag;
   1532 
   1533 	// Trapping instruction.  Determine the trap type.
   1534 	uint32_t trapno;
   1535 	uint32_t trapeip = emu->cpu.eip + f->insn[ino].srcofs;
   1536 	uint8_t *inp = (uint8_t*)emu->mem->base + trapeip;
   1537 	switch (inp[0]) {
   1538 	case 0xcc:	// Breakpoint
   1539 		trapno = VXTRAP_BREAKPOINT;
   1540 		trapeip++;	// EIP points after insn
   1541 		break;
   1542 	case 0xcd:	// INT $n
   1543 		trapno = VXTRAP_SOFT + inp[1];
   1544 		trapeip += 2;	// EIP points after insn
   1545 		break;
   1546 	case 0x0f:
   1547 		if (inp[1] == 0x05) {	// SYSCALL instruction
   1548 			trapno = VXTRAP_SYSCALL;
   1549 			trapeip += 2;	// EIP points after insn
   1550 			break;
   1551 		}
   1552 		// fall thru...
   1553 	default:	// Invalid instruction
   1554 		trapno = VXTRAP_INVALID;
   1555 		break;
   1556 	}
   1557 
   1558 	// Emit the output code sequence.
   1559 	uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs;
   1560 
   1561 	// movl %eax,VSEG:VXEMU_EAX
   1562 	outp[0] = VSEGPREFIX;
   1563 	outp[1] = 0xa3;
   1564 	*(uint32_t*)&outp[2] = offsetof(vxemu, cpu.reg[EAX]);
   1565 
   1566 	// movl $trapno,%eax
   1567 	outp[6+0] = 0xb8;
   1568 	*(uint32_t*)&outp[6+1] = trapno;
   1569 
   1570 	// movl $trapeip,VSEG:VXEMU_EIP
   1571 	outp[6+5+0] = VSEGPREFIX;
   1572 	outp[6+5+1] = 0xc7;
   1573 	outp[6+5+2] = 0x05;
   1574 	*(uint32_t*)&outp[6+5+3] = offsetof(vxemu, cpu.eip);
   1575 	*(uint32_t*)&outp[6+5+7] = trapeip;
   1576 
   1577 	// jmp vxrun_gentrap
   1578 	outp[6+5+11+0] = 0xe9;
   1579 	*(uint32_t*)&outp[6+5+11+1] = (uint32_t)(intptr_t)vxrun_gentrap -
   1580 					(uint32_t)(intptr_t)&outp[6+5+11+5];
   1581 
   1582 	assert(f->insn[ino].dstlen == 6+5+11+5);
   1583 }
   1584 
   1585 // Translation pass 4:
   1586 // Emit the translated instruction stream.
   1587 static void xemit(struct vxproc *p)
   1588 {
   1589 	unsigned i, j;
   1590 	struct vxemu *emu = p->emu;
   1591 	struct vxfrag *f = emu->txfrag;
   1592 	unsigned ninsn = f->ninsn;
   1593 
   1594 	// Writing the instruction stream immediately after the insn table.
   1595 	uint8_t *outstart = FRAGCODE(f);
   1596 
   1597 	// Write extra trampoline code after the already-arranged code.
   1598 	uint8_t *extra = outstart + (unsigned)f->insn[ninsn-1].dstofs
   1599 				+ (unsigned)f->insn[ninsn-1].dstlen;
   1600 
   1601 	// First emit the prolog
   1602 	outstart[0] = VSEGPREFIX;			// Segment override
   1603 	outstart[1] = 0x8b; outstart[2] = 0x1d;		// movl <abs32>,%ebx
   1604 	*(uint32_t*)&outstart[3] = offsetof(vxemu, cpu.reg[EBX]);
   1605 
   1606 	// Now emit the instructions
   1607 	asm volatile("cld");
   1608 	uint8_t *instart = (uint8_t*)emu->mem->base + emu->cpu.eip;
   1609 	for (i = 0; i < ninsn; ) {
   1610 		unsigned itype = f->insn[i].itype;
   1611 
   1612 		switch (itype) {
   1613 
   1614 		case VXI_NOTRANS:
   1615 			// Just copy strings of untranslated instructions.
   1616 			for (j = i+1; j < ninsn; j++)
   1617 				if (f->insn[j].itype != VXI_NOTRANS)
   1618 					break;
   1619 
   1620 			unsigned srcofs = f->insn[i].srcofs;
   1621 			unsigned dstofs = f->insn[i].dstofs;
   1622 			uint8_t *inp = instart + f->insn[i].srcofs;
   1623 			uint8_t *outp = outstart + f->insn[i].dstofs;
   1624 			unsigned cnt = f->insn[j].dstofs - dstofs;
   1625 			assert(cnt == f->insn[j].srcofs - srcofs);
   1626 			asm volatile("rep movsb"
   1627 				: : "c" (cnt), "S" (inp), "D" (outp));
   1628 
   1629 			i = j;
   1630 			break;
   1631 
   1632 		case VXI_CALL:
   1633 		case VXI_JUMP:
   1634 		case VXI_ENDFRAG:
   1635 		case VXI_LOOP:
   1636 		case VXI_LOOPZ:
   1637 		case VXI_LOOPNZ:
   1638 			xemit_jump(p, itype, i++, &extra);
   1639 			break;
   1640 
   1641 		case VXI_JUMP8:
   1642 			xemit_jump8(p, i++);
   1643 			break;
   1644 
   1645 		case VXI_RETURN:
   1646 		case VXI_JUMPIND:
   1647 		case VXI_CALLIND:
   1648 			xemit_indir(p, itype, i++);
   1649 			break;
   1650 
   1651 		case VXI_TRAP:
   1652 			xemit_trap(p, i++);
   1653 			break;
   1654 
   1655 		default:
   1656 			assert(0);
   1657 		}
   1658 	}
   1659 
   1660 	// Record the final amount of code table space we've consumed.
   1661 	emu->codefree = extra;
   1662 
   1663 	// Add an entry to the code pointer table to the new fragment
   1664 	uint32_t *codetab = emu->codetab;
   1665 	*--codetab = (uint32_t)(intptr_t)f;
   1666 	emu->codetab = codetab;
   1667 
   1668 	assert((void*)extra < (void*)codetab);
   1669 
   1670 	// Insert the new entrypoint into the hash table
   1671 	uint32_t idx = etabhash(emu->cpu.eip) & emu->etabmask;
   1672 	while (emu->etab[idx].srceip != NULLSRCEIP) {
   1673 		assert(emu->etab[idx].srceip != emu->cpu.eip);
   1674 		idx = (idx+1) & emu->etabmask;
   1675 	}
   1676 	emu->etab[idx].srceip = emu->cpu.eip;
   1677 	emu->etab[idx].dsteip = (uint32_t)(intptr_t)outstart;
   1678 	emu->etabcnt++;
   1679 	
   1680 	if (vx32_debugxlate) {
   1681 		vxrun_cleanup(emu);
   1682 		vxprint("====== xlate\n");
   1683 		vxprint("-- guest\n");
   1684 		disassemble(emu->mem->base, emu->guestfrag, emu->guestfragend);
   1685 		vxprint("-- translation\n");
   1686 		disassemble(NULL, outstart, extra);
   1687 		vxprint("======\n");
   1688 		vxrun_setup(emu);
   1689 	}
   1690 }
   1691 
   1692 static int xlate(struct vxproc *vxp)
   1693 {
   1694 	// Pass 1: scan instruction stream, build preliminary vxinsn table
   1695 	int rc = xscan(vxp);
   1696 	if (rc != 0)
   1697 		return rc;
   1698 
   1699 	// Pass 2: simplify vxinsns wherever possible
   1700 	xsimp(vxp);
   1701 
   1702 	// Pass 3: compute final instruction placement and sizes
   1703 	xplace(vxp);
   1704 
   1705 	// Pass 4: emit translated instructions
   1706 	xemit(vxp);
   1707 
   1708 	return 0;
   1709 }
   1710 
   1711 #if 0
   1712 #include <asm/prctl.h>
   1713 #include <sys/prctl.h>
   1714 #endif
   1715 
   1716 void dumpsegs(const char *prefix)
   1717 {
   1718 	uint16_t ds, es, fs, gs, ss;
   1719 	asm(	"movw %%ds,%0; movw %%es,%1; "
   1720 		"movw %%fs,%2; movw %%gs,%3; "
   1721 		"movw %%ss,%4"
   1722 		: "=rm"(ds), "=rm" (es), "=rm" (fs), "=rm" (gs), "=rm" (ss));
   1723 	vxprint("%s: ds=%04x es=%04x fs=%04x gs=%04x ss=%04x\n",
   1724 		prefix, ds, es, fs, gs, ss);
   1725 #if 0
   1726 	unsigned long fsofs, gsofs;
   1727 	arch_prctl(ARCH_GET_FS, (unsigned long)&fsofs);
   1728 	arch_prctl(ARCH_GET_GS, (unsigned long)&gsofs);
   1729 	vxprint("fsofs=%016lx gsofs=%016lx\n", fsofs, gsofs);
   1730 #endif
   1731 }
   1732 
   1733 int vxproc_run(struct vxproc *vxp)
   1734 {
   1735 	vxemu *emu = vxp->emu;
   1736 	vxmmap *mm;
   1737 
   1738 	// Make sure the process is mapped into our host memory
   1739 	if ((mm = vxmem_map(vxp->mem, 0)) == NULL)
   1740 		return -1;
   1741 	if (vxemu_map(emu, mm) < 0) {
   1742 		vxmem_unmap(vxp->mem, mm);
   1743 		return -1;
   1744 	}
   1745 	emu->mem = mm;
   1746 	
   1747 	// Pending trap?
   1748 	if(emu->cpu_trap){
   1749 		assert(0);	// Can this even happen?
   1750 		int trap = emu->cpu_trap;
   1751 		emu->cpu_trap = 0;
   1752 		return trap;
   1753 	}
   1754 	
   1755 	uint16_t vs;
   1756 	// Registers can't be already loaded or we will smash
   1757 	// the "host segment registers" part of emu.
   1758 	asm("movw %"VSEGSTR",%0"
   1759 		: "=r" (vs));
   1760 
   1761 	assert(vs != emu->emusel);
   1762 
   1763 	// Save our stack environment for exception-handling.
   1764 	// This only saves the integer registers.  If the signal handler
   1765 	// happens in the middle of a translation involving floating-point
   1766 	// code, we need to make sure that when we jump back here in the
   1767 	// handler, we first restore the floating point registers to
   1768 	// the state they were in during the computation.  (Operating
   1769 	// systems typically save the FPU state, reset the FPU, and 
   1770 	// pass the saved state to the signal handler.)
   1771 	// The Linux signal handler does exactly this.
   1772 	//
   1773 	// On FreeBSD, after hours wasted trying to manually restore the
   1774 	// floating point state, I gave up.  Instead, the FreeBSD code
   1775 	// saves an mcontext_t here and then overwrites the signal handler's
   1776 	// mcontext_t with this one.  Then when it returns from the handler,
   1777 	// the OS will restore the floating point state and then the mcontext,
   1778 	// jumping back here with exactly the FPU state that we want.
   1779 	// Why not do this on Linux?  Because it didn't work when I tried it,
   1780 	// and I was not about to track down why.
   1781 	//
   1782 	// On OS X, there is no getcontext, so you'd think we'd be back to
   1783 	// the Linux approach of manual FPU restore + siglongjmp.
   1784 	// Unfortunately, OS X can't deal with siglongjmp from alternate
   1785 	// signal stacks.  If it invokes a signal handler on an alternate 
   1786 	// signal stack and that handler uses siglongjmp to go back to the
   1787 	// original stack instead of returning out of the handler, then
   1788 	// OS X thinks the code is still running on the alternate stack, 
   1789 	// which causes all sorts of problems.  Thus we have to do the
   1790 	// getcontext trick.  Besides, it is far easier to write a getcontext
   1791 	// routine--we already need to know the layout of mcontext_t to
   1792 	// write the signal handler--than to figure out what the FPU state
   1793 	// looks like.
   1794 	//
   1795 	// And you thought this was going to be easy.
   1796 
   1797 #if defined(__FreeBSD__)
   1798 	ucontext_t env;
   1799 	emu->trapenv = &env.uc_mcontext;
   1800 	volatile int n = 0;
   1801 	getcontext(&env);
   1802 	if(++n > 1){
   1803 #elif defined(__APPLE__)
   1804 	struct i386_thread_state env;
   1805 	emu->trapenv = &env;
   1806 	if(vx32_getcontext(&env)){
   1807 #else
   1808 	mcontext_t env;
   1809 	emu->trapenv = &env;
   1810 	if(vx32_getcontext(&env)){
   1811 #endif
   1812 		if(vx32_debugxlate) vxprint("VX trap %x err %x va %08x "
   1813 				"veip %08x veflags %08x\n",
   1814 				emu->cpu_trap, emu->cpu.traperr, emu->cpu.trapva,
   1815 				emu->cpu.eip, emu->cpu.eflags);
   1816 		goto trapped;
   1817 	}
   1818 
   1819 	// Load our special vxproc segment selector into fs register.
   1820 	vxrun_setup(emu);
   1821 
   1822 	while (1) {
   1823 		// Look up the translated entrypoint for the current vx32 EIP.
   1824 		uint32_t eip = emu->cpu.eip;
   1825 		uint32_t idx = etabhash(eip) & emu->etabmask;
   1826 		while (emu->etab[idx].srceip != eip) {
   1827 			if (emu->etab[idx].srceip == NULLSRCEIP)
   1828 				goto notfound;
   1829 			idx = (idx+1) & emu->etabmask;
   1830 		}
   1831 
   1832 		// Run the translated code fragment.
   1833 		// Return if the code terminated with an exception.
   1834 		// Otherwise it terminated because of an untranslated EIP,
   1835 		// so translate it.
   1836 		if(vxrun(emu, emu->etab[idx].dsteip) != 0)
   1837 			break;
   1838 
   1839 	notfound:
   1840 		// Translate the code fragment the current emu->cpu.eip points to
   1841 		if(xlate(vxp) != 0)
   1842 			break;
   1843 	}
   1844 
   1845 	// Restore the usual flat model data segment registers.
   1846 	vxrun_cleanup(emu);
   1847 	
   1848 trapped:
   1849 	// De-register our setjmp environment for trap handling.
   1850 	emu->trapenv = NULL;
   1851 
   1852 	emu->mem = NULL;
   1853 	int trap = emu->cpu_trap;
   1854 	emu->cpu_trap = 0;
   1855 	return trap;
   1856 }
   1857 
   1858 void vxemu_stats(struct vxproc *p)
   1859 {
   1860 	unsigned i;
   1861 	vxemu *emu = p->emu;
   1862 
   1863 	vxprint("flush count: %llu\n", nflush);
   1864 
   1865 //	vxprint("vxproc size %dKB\n", p->size/1024);
   1866 
   1867 	unsigned coll = 0;
   1868 	for (i = 0; i < emu->etablen; i++) {
   1869 		vxentry *e = &emu->etab[i];
   1870 		if (e->srceip == NULLSRCEIP)
   1871 			continue;
   1872 		unsigned idx = etabhash(e->srceip) & emu->etabmask;
   1873 		if (idx != i) {
   1874 		//	vxprint("srcip %08x hash %d actually at %d\n",
   1875 		//		e->srceip, idx, i);
   1876 			coll++;
   1877 		}
   1878 	}
   1879 	vxprint("entry tab: %d used, %d total, %d collisions\n",
   1880 		emu->etabcnt, emu->etablen, coll);
   1881 }
   1882 
   1883 static void disassemble(uint8_t *addr0, uint8_t *p, uint8_t *ep)
   1884 {
   1885 	xdinst i;
   1886 	int j;
   1887 	uint8_t *q;
   1888 	char buf[128];
   1889 
   1890 	for (; p < ep; p = q) {
   1891 		if ((q = x86decode(addr0, p, &i)) == NULL)
   1892 			break;
   1893 		x86print(buf, sizeof buf, &i);
   1894 		vxprint("%08x", i.addr);
   1895 		for(j=0; j<i.len; j++)
   1896 			vxprint(" %02x", p[j]);
   1897 		for(; j<10; j++)
   1898 			vxprint("   ");
   1899 		vxprint(" %s\n", buf);
   1900 	}
   1901 }
   1902 
   1903 void vxprint(char *fmt, ...)
   1904 {
   1905 	va_list arg;
   1906 	char buf[512];
   1907 	
   1908 	va_start(arg, fmt);
   1909 	vsnprintf(buf, sizeof buf, fmt, arg);
   1910 	va_end(arg);
   1911 	USED(write(2, buf, strlen(buf)));
   1912 }
   1913