initial commit

This commit is contained in:
Rebecca Doth 2023-11-12 03:13:43 +01:00
commit bc8f6e44f4
4 changed files with 309 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
listing*
t1*
t2*
8086d

27
Makefile Normal file
View File

@ -0,0 +1,27 @@
WRN := -Wall -Wextra
INF := main.c
OUT := 8086d
ASM-INF1 := listing_0037_single_register_mov.asm
ASM-INF2 := listing_0038_many_register_mov.asm
ASM-OUT1 := listing_0037_single_register_mov
ASM-OUT2 := listing_0038_many_register_mov
default:
$(CC) $(WRN) $(INF) -o $(OUT)
testdata:
nasm $(ASM-INF1) -o $(ASM-OUT1)
nasm $(ASM-INF2) -o $(ASM-OUT2)
test: clean testdata default
./$(OUT) $(ASM-OUT1) > t1.S
./$(OUT) $(ASM-OUT2) > t2.S
nasm t1.S -o t1
nasm t2.S -o t2
diff t1 $(ASM-OUT1)
diff t2 $(ASM-OUT2)
clean:
rm -f t1.S t2.S t1 t2 $(OUT) $(ASM-OUT1) $(ASM-OUT2)

3
README.md Normal file
View File

@ -0,0 +1,3 @@
The tests are course material, although public, they've been omitted.
The program in it's current state should be able to decode any combination of 8086 register-to-register mov. Only that much for now.

274
main.c Normal file
View File

@ -0,0 +1,274 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define REGSTRLEN 8
#define INSTSTRLEN 32
typedef enum {
UNDEFINED = 0,
MOV = 0b100010
} OpCodes;
typedef enum {
AX = 0b000,
AL = 0b000,
AH = 0b100,
CX = 0b001,
CL = 0b001,
CH = 0b101,
DX = 0b010,
DL = 0b010,
DH = 0b110,
BX = 0b011,
BL = 0b011,
BH = 0b111,
SP = 0b100,
BP = 0b101,
SI = 0b110,
DI = 0b111
} Registers;
typedef struct {
OpCodes OpCode;
Registers Dst;
Registers Src;
_Bool wide;
} Instruction;
Registers match_regs(char byte, _Bool wide)
{
if(wide)
switch(byte)
{
case CX:
return CX;
case DX:
return DX;
case BX:
return BX;
case SP:
return SP;
case BP:
return BP;
case SI:
return SI;
case DI:
return DI;
//case AX:
default:
return AX;
}
switch(byte)
{
case CL:
return CL;
case DL:
return DL;
case BL:
return BL;
case AH:
return AH;
case CH:
return CH;
case DH:
return DH;
case BH:
return BH;
//case AL:
default:
return AL;
}
}
void insttostr(OpCodes opcode, char* instStr)
{
switch(opcode)
{
case MOV:
memcpy(instStr, "mov", 3);
break;
case UNDEFINED:
default:
break;
}
}
void regtostr(Registers reg, char* regStr, _Bool wide)
{
if(wide)
{
switch(reg)
{
case CX:
memcpy(regStr, "cx", 2);
break;
case DX:
memcpy(regStr, "dx", 2);
break;
case BX:
memcpy(regStr, "bx", 2);
break;
case SP:
memcpy(regStr, "sp", 2);
break;
case BP:
memcpy(regStr, "bp", 2);
break;
case SI:
memcpy(regStr, "si", 2);
break;
case DI:
memcpy(regStr, "di", 2);
break;
//case AX:
default:
memcpy(regStr, "ax", 2);
break;
}
} else
{
switch(reg)
{
case CL:
memcpy(regStr, "cl", 2);
break;
case DL:
memcpy(regStr, "dl", 2);
break;
case BL:
memcpy(regStr, "bl", 2);
break;
case AH:
memcpy(regStr, "ah", 2);
break;
case CH:
memcpy(regStr, "ch", 2);
break;
case DH:
memcpy(regStr, "dh", 2);
break;
case BH:
memcpy(regStr, "bh", 2);
break;
//case AL:
default:
memcpy(regStr, "al", 2);
break;
}
}
}
void print_instructions(Instruction* instructions, size_t ninst, char* filename)
{
printf("; disassembly for file %s\nbits 16\n\n", filename);
for(size_t i = 0; i < ninst; ++i)
{
if(instructions[i].OpCode != UNDEFINED)
{
char inst[INSTSTRLEN] = { '\0' };
char dst[REGSTRLEN] = { '\0' };
char src[REGSTRLEN] = { '\0' };
insttostr(instructions[i].OpCode, inst);
regtostr(instructions[i].Dst, dst, instructions[i].wide);
regtostr(instructions[i].Src, src, instructions[i].wide);
printf("%s %s, %s\n", inst, dst, src);
} else
{
puts("; UNDEFINED INSTRUCTION");
}
}
putc('\n', stdout);
}
void parse_instructions(unsigned char* bytes, size_t nbytes, Instruction* instructions)
{
size_t bytes_used = 0;
for(size_t i = 0, iindx = 0; i < nbytes; i += bytes_used, ++iindx)
{
bytes_used = 0;
switch(bytes[i] >> 2) /* OPCODE */
{
case MOV:
instructions[iindx].OpCode = MOV;
++bytes_used;
break;
default:
instructions[iindx].OpCode = UNDEFINED;
++bytes_used;
break;
}
switch(bytes[i + 1] & 0b11000000) /* MOD bits */
{
case 0b11000000:
/* decode reg-to-reg instruction */
instructions[iindx].wide = bytes[i] & 0b1 ? 1 : 0;
instructions[iindx].Dst = match_regs( ( bytes[i + 1] & 0b111), instructions[iindx].wide );
instructions[iindx].Src = match_regs( ((bytes[i + 1] >> 3) & 0b111), instructions[iindx].wide );
++bytes_used;
break;
default:
++bytes_used;
break;
}
}
}
int main(int argc, char** argv)
{
if(argc < 2)
{
fputs("No input provided", stderr);
return -1;
}
int argi = 1;
while(argi < argc)
{
FILE* f = fopen(argv[argi], "r");
if(f == NULL)
{
perror(argv[argi]);
goto LOOP_END_NOFREE_NOCLOSE;
}
fseek(f, 0, SEEK_END);
const long fsize = ftell(f);
if(fsize < 2)
{
fprintf(stderr, "%s: file too small", argv[argi]);
goto LOOP_END_NOFREE_CLOSE;
}
rewind(f);
unsigned char* bytes = calloc(1, fsize);
const size_t bytes_read = fread(bytes, 1, fsize, f);
Instruction* instructions = calloc(sizeof(Instruction), bytes_read / 2);
parse_instructions(bytes, bytes_read, instructions);
print_instructions(instructions, bytes_read / 2, argv[argi]);
free(instructions);
free(bytes);
LOOP_END_NOFREE_CLOSE:
fclose(f);
LOOP_END_NOFREE_NOCLOSE:
++argi;
}
return 0;
}