This commit is contained in:
Rebecca Doth 2024-04-16 17:50:55 +02:00
parent 15133290e3
commit 7c3411d826

295
main.c
View file

@ -2,14 +2,23 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#define REGSTRLEN 8
#define INSTSTRLEN 32
typedef enum { typedef enum {
UNDEFINED = 0, UNDEFINED = 0,
MOV_RM = 0b100010 MOV_RM = 0b100010
} OpCodes; } OpCodes;
typedef enum {
BX_SI = 0b000,
BX_DI = 0b001,
BP_SI = 0b010,
BP_DI = 0b011,
SI___ = 0b100,
DI___ = 0b101,
BP___ = 0b110,
Di_Ad = 0b110,
BX___ = 0b111
} EffectiveAddress;
typedef enum { typedef enum {
AX = 0b000, AX = 0b000,
AL = 0b000, AL = 0b000,
@ -34,58 +43,6 @@ typedef enum {
DI = 0b111 DI = 0b111
} Registers; } Registers;
typedef struct {
OpCodes OpCode;
Registers Dst;
Registers Src;
_Bool wide;
} Instruction;
Registers match_regs(char byte, _Bool wide)
{
if(wide)
switch(byte)
{
case CX:
return CX;
case DX:
return DX;
case BX:
return BX;
case SP:
return SP;
case BP:
return BP;
case SI:
return SI;
case DI:
return DI;
//case AX:
default:
return AX;
}
switch(byte)
{
case CL:
return CL;
case DL:
return DL;
case BL:
return BL;
case AH:
return AH;
case CH:
return CH;
case DH:
return DH;
case BH:
return BH;
//case AL:
default:
return AL;
}
}
void insttostr(OpCodes opcode, char* instStr) void insttostr(OpCodes opcode, char* instStr)
{ {
switch(opcode) switch(opcode)
@ -100,36 +57,35 @@ void insttostr(OpCodes opcode, char* instStr)
} }
} }
void regtostr(Registers reg, char* regStr, _Bool wide) void regtostr(Registers reg, char* str, _Bool wide)
{ {
if(wide) if(wide)
{ {
switch(reg) switch(reg)
{ {
case CX: case CX:
memcpy(regStr, "cx", 2); memcpy(str, "cx", 2);
break; break;
case DX: case DX:
memcpy(regStr, "dx", 2); memcpy(str, "dx", 2);
break; break;
case BX: case BX:
memcpy(regStr, "bx", 2); memcpy(str, "bx", 2);
break; break;
case SP: case SP:
memcpy(regStr, "sp", 2); memcpy(str, "sp", 2);
break; break;
case BP: case BP:
memcpy(regStr, "bp", 2); memcpy(str, "bp", 2);
break; break;
case SI: case SI:
memcpy(regStr, "si", 2); memcpy(str, "si", 2);
break; break;
case DI: case DI:
memcpy(regStr, "di", 2); memcpy(str, "di", 2);
break; break;
//case AX: case AX:
default: memcpy(str, "ax", 2);
memcpy(regStr, "ax", 2);
break; break;
} }
} else } else
@ -138,96 +94,202 @@ void regtostr(Registers reg, char* regStr, _Bool wide)
switch(reg) switch(reg)
{ {
case CL: case CL:
memcpy(regStr, "cl", 2); memcpy(str, "cl", 2);
break; break;
case DL: case DL:
memcpy(regStr, "dl", 2); memcpy(str, "dl", 2);
break; break;
case BL: case BL:
memcpy(regStr, "bl", 2); memcpy(str, "bl", 2);
break; break;
case AH: case AH:
memcpy(regStr, "ah", 2); memcpy(str, "ah", 2);
break; break;
case CH: case CH:
memcpy(regStr, "ch", 2); memcpy(str, "ch", 2);
break; break;
case DH: case DH:
memcpy(regStr, "dh", 2); memcpy(str, "dh", 2);
break; break;
case BH: case BH:
memcpy(regStr, "bh", 2); memcpy(str, "bh", 2);
break; break;
//case AL: case AL:
default: memcpy(str, "al", 2);
memcpy(regStr, "al", 2);
break; break;
} }
} }
} }
void print_instructions(Instruction* instructions, size_t ninst, char* filename) void eactostr(EffectiveAddress ea, char* str, unsigned short displacement, _Bool displace, _Bool directaddress)
{ {
str[0] = '[';
off_t offset = 1;
printf("; disassembly for file %s\nbits 16\n\n", filename); if(!directaddress)
for(size_t i = 0; i < ninst; ++i)
{ {
if(instructions[i].OpCode != UNDEFINED) switch(ea)
{ {
char inst[INSTSTRLEN] = { '\0' }; case BX_SI:
char dst[REGSTRLEN] = { '\0' }; memcpy(str + offset + 1, "bx + si", 7);
char src[REGSTRLEN] = { '\0' }; offset += 7;
break;
insttostr(instructions[i].OpCode, inst); case BX_DI:
regtostr(instructions[i].Dst, dst, instructions[i].wide); memcpy(str + offset + 1, "bx + di", 7);
regtostr(instructions[i].Src, src, instructions[i].wide); offset += 7;
break;
printf("%s %s, %s\n", inst, dst, src); case BP_SI:
} else memcpy(str + offset + 1, "bp + si", 7);
{ offset += 7;
puts("; UNDEFINED INSTRUCTION"); break;
case BP_DI:
memcpy(str + offset + 1, "bp + di", 7);
offset += 7;
break;
case SI___:
memcpy(str + offset + 1, "si", 2);
offset += 2;
break;
case DI___:
memcpy(str + offset + 1, "di", 2);
offset += 2;
break;
case BP___:
memcpy(str + offset + 1, "bp", 2);
offset += 2;
break;
case BX___:
memcpy(str + offset + 1, "bx", 2);
offset += 2;
break;
} }
} }
putc('\n', stdout); if(displace)
{
if(!directaddress)
{
memcpy(str + offset, " + ", 3);
offset += 3;
}
offset += snprintf(str + offset + 1, 6, "%hu", displacement);
}
str[offset + 1] = ']';
} }
void parse_instructions(unsigned char* bytes, size_t nbytes, Instruction* instructions) void print_instructions(unsigned char* bytes, size_t nbytes)
{ {
size_t bytes_used = 0; size_t bytes_used = 0;
for(size_t i = 0, iindx = 0; i < nbytes; i += bytes_used, ++iindx) for(size_t i = 0, iindx = 0; i < nbytes; i += bytes_used, ++iindx)
{ {
bytes_used = 0; bytes_used = 0;
instructions[iindx].OpCode = (OpCodes)(bytes[i] >> 2); OpCodes opcode = (OpCodes)(bytes[i] >> 2);
_Bool direction = (bytes[i] >> 1) & 0b1;
_Bool wide = bytes[i] & 0b1;
unsigned short displacement;
++bytes_used;
switch(instructions[iindx].OpCode) char inststr[16] = { '\0' };
char srcostr[16] = { '\0' };
char dstostr[16] = { '\0' };
char tmp1, tmp2;
switch(opcode)
{ {
case MOV_RM: case MOV_RM: // Register/Memory
bytes_used += 2;
switch(bytes[i + 1] >> 6) // MOD field switch(bytes[i + 1] >> 6) // MOD field
{ {
//case 0b11: case 0b00: // register to memory, no disp, 16-bit disp if R\M = 110
default: tmp1 = bytes[i + 1] & 0b111; // R/M field
instructions[iindx].wide = bytes[i] & 0b1; tmp2 = (bytes[i + 1] >> 3) & 0b111; // REG field
Registers tmp1 = match_regs((bytes[i + 1] >> 3) & 0b111, instructions[iindx].wide); // REG field ++bytes_used;
Registers tmp2 = match_regs( bytes[i + 1] & 0b111, instructions[iindx].wide); // R/M field
// NOTE: technically i could avoid using tmp1/2 by moving the match_regs inside the if statement but if(!direction)
// CONT: i didnt feel like repeating code, in case a future redesign may be neccessery.
if((bytes[i] >> 0b1) & 0b1) // direction bit
{ {
instructions[iindx].Src = tmp2; insttostr(opcode, inststr);
instructions[iindx].Dst = tmp1; regtostr(tmp2, srcostr, wide);
if(tmp1 == Di_Ad)
{
displacement = bytes[i + 2] ^ (bytes[i + 3] << 4);
eactostr(tmp1, dstostr, displacement, 1, 1);
} else {
eactostr(tmp1, dstostr, 0, 0, 0);
}
} else { } else {
instructions[iindx].Src = tmp1; insttostr(opcode, inststr);
instructions[iindx].Dst = tmp2; regtostr(tmp2, dstostr, wide);
if(tmp1 == Di_Ad)
{
displacement = bytes[i + 2] ^ (bytes[i + 3] << 4);
eactostr(tmp1, srcostr, displacement, 1, 1);
} else {
eactostr(tmp1, srcostr, 0, 0, 0);
}
} }
break; break;
case 0b01: // register to memory, 8-bit disp
tmp1 = bytes[i + 1] & 0b111; // R/M field
tmp2 = (bytes[i + 1] >> 3) & 0b111; // REG field
displacement = bytes[i + 2];
bytes_used += 2;
if(!direction)
{
insttostr(opcode, inststr);
regtostr(tmp2, srcostr, wide);
eactostr(tmp1, dstostr, displacement, 1, 0);
} else {
insttostr(opcode, inststr);
regtostr(tmp2, dstostr, wide);
eactostr(tmp1, srcostr, displacement, 1, 0);
}
break;
case 0b10: // register to memory, 16-bit disp
tmp1 = bytes[i + 1] & 0b111; // R/M field
tmp2 = (bytes[i + 1] >> 3) & 0b111; // REG field
displacement = bytes[i + 2] ^ (bytes[i + 3] << 4); // get displacement bytes
bytes_used += 3;
if(!direction)
{
insttostr(opcode, inststr);
regtostr(tmp2, srcostr, wide);
eactostr(tmp1, dstostr, displacement, 1, 0);
} else {
insttostr(opcode, inststr);
regtostr(tmp2, dstostr, wide);
eactostr(tmp1, srcostr, displacement, 1, 0);
}
break;
//case 0b11:
default: // register to register
tmp1 =(bytes[i + 1] >> 3) & 0b111; // REG field
tmp2 = bytes[i + 1] & 0b111; // R/M field
if(!direction) // 0 = REG is source
{
insttostr(opcode, inststr);
regtostr(tmp1, srcostr, wide);
regtostr(tmp2, dstostr, wide);
} else {
insttostr(opcode, inststr);
regtostr(tmp2, srcostr, wide);
regtostr(tmp1, dstostr, wide);
}
printf("%s %s, %s\n", inststr, dstostr, srcostr);
break;
} }
break; break;
default: default:
instructions[iindx].OpCode = UNDEFINED;
++bytes_used;
break; break;
} }
} }
@ -260,16 +322,15 @@ int main(int argc, char** argv)
} }
rewind(f); rewind(f);
unsigned char* bytes = calloc(1, fsize); unsigned char* bytes = calloc(1, fsize); // TODO: check if allocation failed
const size_t bytes_read = fread(bytes, 1, fsize, f); const size_t bytes_read = fread(bytes, 1, fsize, f);
Instruction* instructions = calloc(sizeof(Instruction), bytes_read / 2); printf("; disassembly for file %s\nbits 16\n\n", argv[argi]);
parse_instructions(bytes, bytes_read, instructions); print_instructions(bytes, bytes_read);
print_instructions(instructions, bytes_read / 2, argv[argi]); printf("\n");
free(instructions);
free(bytes); free(bytes);
LOOP_END_NOFREE_CLOSE: LOOP_END_NOFREE_CLOSE:
fclose(f); fclose(f);