From a18f250256b7e8e6c0e98402de00d9bfdf56ce25 Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Fri, 2 Jan 2026 19:13:30 +0800 Subject: [PATCH] add a simple assembler for x64 ASM --- asm/1.s | 13 + asm/Makefile | 53 ++++ asm/main.c | 122 ++++++++ asm/scf_asm.c | 424 ++++++++++++++++++++++++++ asm/scf_asm.h | 87 ++++++ asm/scf_dfa_asm.c | 98 ++++++ asm/scf_dfa_inst.c | 532 +++++++++++++++++++++++++++++++++ core/scf_lex_word.h | 15 + docs/Naja_sys.txt | 5 +- elf/scf_elf.h | 3 +- lex/scf_lex.c | 59 +++- lex/scf_lex.h | 9 +- lex/scf_lex_test.c | 4 +- lex/scf_lex_util.c | 58 +++- native/scf_instruction.c | 76 +++++ native/scf_instruction.h | 113 +++++++ native/scf_native.c | 47 --- native/scf_native.h | 98 +----- native/x64/scf_x64.h | 64 +--- native/x64/scf_x64_inst_util.c | 124 ++++++++ native/x64/scf_x64_opcode.c | 34 ++- native/x64/scf_x64_opcode.h | 87 +++++- native/x64/scf_x64_reg.c | 166 +--------- native/x64/scf_x64_reg.h | 81 +---- native/x64/scf_x64_reg_util.c | 166 ++++++++++ native/x64/scf_x64_reg_util.h | 85 ++++++ native/x64/scf_x64_util.h | 1 - parse/Makefile | 2 + parse/main.c | 65 ++-- parse/scf_dfa.c | 29 +- parse/scf_dfa.h | 20 +- parse/scf_dfa_parse.c | 68 ++--- parse/scf_parse.c | 154 ++-------- parse/scf_symtab.c | 100 +++++++ parse/scf_symtab.h | 37 +++ util/scf_string.h | 2 +- 36 files changed, 2364 insertions(+), 737 deletions(-) create mode 100644 asm/1.s create mode 100644 asm/Makefile create mode 100644 asm/main.c create mode 100644 asm/scf_asm.c create mode 100644 asm/scf_asm.h create mode 100644 asm/scf_dfa_asm.c create mode 100644 asm/scf_dfa_inst.c create mode 100644 native/scf_instruction.c create mode 100644 native/scf_instruction.h create mode 100644 native/x64/scf_x64_reg_util.c create mode 100644 native/x64/scf_x64_reg_util.h create mode 100644 parse/scf_symtab.c create mode 100644 parse/scf_symtab.h diff --git a/asm/1.s b/asm/1.s new file mode 100644 index 0000000..c184d42 --- /dev/null +++ b/asm/1.s @@ -0,0 +1,13 @@ +.text +.global main, printf + +main: + push %rbp + leaq hello, %rdi + xorq %rax, %rax + call printf + pop %rbp + ret + +.data +hello: .asciz "hello world\n" diff --git a/asm/Makefile b/asm/Makefile new file mode 100644 index 0000000..4610bf5 --- /dev/null +++ b/asm/Makefile @@ -0,0 +1,53 @@ +CFILES += ../util/scf_string.c +CFILES += ../lex/scf_lex.c +CFILES += ../lex/scf_lex_util.c + +CFILES += main.c +CFILES += scf_asm.c +CFILES += scf_dfa_asm.c +CFILES += scf_dfa_inst.c + +CFILES += ../core/scf_lex_word.c +CFILES += ../parse/scf_dfa.c +CFILES += ../parse/scf_symtab.c + +CFILES += ../native/scf_instruction.c +CFILES += ../native/x64/scf_x64_opcode.c +CFILES += ../native/x64/scf_x64_reg_util.c +CFILES += ../native/x64/scf_x64_inst_util.c + +CFILES += ../elf/scf_elf.c +CFILES += ../elf/scf_elf_link.c +CFILES += ../elf/scf_elf_native.c +CFILES += ../elf/scf_elf_native32.c +CFILES += ../elf/scf_elf_x64.c +CFILES += ../elf/scf_elf_x64_so.c +CFILES += ../elf/scf_elf_arm64.c +CFILES += ../elf/scf_elf_arm64_so.c +CFILES += ../elf/scf_elf_arm32.c +CFILES += ../elf/scf_elf_arm32_so.c +CFILES += ../elf/scf_elf_naja.c +CFILES += ../elf/scf_elf_naja_so.c +CFILES += ../elf/scf_dwarf.c +CFILES += ../elf/scf_dwarf_abbrev.c +CFILES += ../elf/scf_dwarf_info.c +CFILES += ../elf/scf_dwarf_line.c + +CFLAGS += -g +#CFLAGS += -Wall +CFLAGS += -I../util +CFLAGS += -I../lex +CFLAGS += -I../parse +CFLAGS += -I../core +CFLAGS += -I../elf +CFLAGS += -I../native +CFLAGS += -I../native/x64 +CFLAGS += -I../native/risc + +LDFLAGS += -ldl -lm + +all: + gcc $(CFLAGS) $(CFILES) $(LDFLAGS) -o sasm + +clean: + rm *.o diff --git a/asm/main.c b/asm/main.c new file mode 100644 index 0000000..bf01a06 --- /dev/null +++ b/asm/main.c @@ -0,0 +1,122 @@ +#include"scf_asm.h" + +void usage(char* path) +{ + fprintf(stderr, "Usage: %s [-a arch] [-s sysroot] [-o out] src0 [src1]\n\n", path); + fprintf(stderr, "-a: select cpu arch (x64, arm64, naja), default is x64\n"); + fprintf(stderr, "-s: sysroot dir, default is '../lib'\n"); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + usage(argv[0]); + return -EINVAL; + } + + scf_vector_t* srcs = scf_vector_alloc(); + + char* sysroot = "../lib"; + char* arch = "x64"; + char* out = NULL; + + int i; + for (i = 1; i < argc; i++) { + + if ('-' == argv[i][0]) { + + if ('a' == argv[i][1]) { + + if (++i >= argc) { + usage(argv[0]); + return -EINVAL; + } + + arch = argv[i]; + continue; + } + + if ('s' == argv[i][1]) { + + if (++i >= argc) { + usage(argv[0]); + return -EINVAL; + } + + sysroot = argv[i]; + continue; + } + + if ('o' == argv[i][1]) { + + if (++i >= argc) { + usage(argv[0]); + return -EINVAL; + } + + out = argv[i]; + continue; + } + + usage(argv[0]); + return -EINVAL; + } + + char* fname = argv[i]; + size_t len = strlen(fname); + + if (len < 3) { + fprintf(stderr, "file '%s' invalid\n", fname); + return -1; + } + + scf_logi("fname: %s\n", fname); + + scf_vector_t* vec; + + if (!strcmp(fname + len - 2, ".s") || !strcmp(fname + len - 2, ".S")) + vec = srcs; + else { + fprintf(stderr, "file '%s' invalid\n", fname); + return -1; + } + + scf_logi("fname: %s\n", fname); + if (scf_vector_add(vec, fname) < 0) + return -ENOMEM; + } + + printf("\n"); + + scf_asm_t* _asm = NULL; + + if (scf_asm_open(&_asm) < 0) { + scf_loge("\n"); + return -1; + } + + for (i = 0; i < srcs->size; i++) { + char* file = srcs->data[i]; + + assert(file); + + if (scf_asm_file(_asm, file) < 0) { + scf_loge("parse file '%s' failed\n", file); + return -1; + } + } + + char* obj = "1.o"; + if (out) + obj = out; + + if (scf_asm_to_obj(_asm, obj, arch) < 0) { + scf_loge("\n"); + return -1; + } + + scf_asm_close(_asm); + + printf("%s(),%d, main ok\n", __func__, __LINE__); + return 0; +} diff --git a/asm/scf_asm.c b/asm/scf_asm.c new file mode 100644 index 0000000..6ac4408 --- /dev/null +++ b/asm/scf_asm.c @@ -0,0 +1,424 @@ +#include"scf_asm.h" +#include"scf_symtab.h" + +int scf_asm_open(scf_asm_t** pasm) +{ + if (!pasm) + return -EINVAL; + + scf_asm_t* _asm = calloc(1, sizeof(scf_asm_t)); + if (!_asm) + return -EINVAL; + + _asm->text = scf_vector_alloc(); + if (!_asm->text) + goto text_error; + + _asm->data = scf_vector_alloc(); + if (!_asm->data) + goto data_error; + + _asm->text_relas = scf_vector_alloc(); + if (!_asm->text_relas) + goto text_rela_error; + + _asm->data_relas = scf_vector_alloc(); + if (!_asm->data_relas) + goto data_rela_error; + + _asm->global = scf_vector_alloc(); + if (!_asm->global) + goto global_error; + + _asm->labels = scf_vector_alloc(); + if (!_asm->labels) + goto labels_error; + + _asm->symtab = scf_vector_alloc(); + if (!_asm->symtab) + goto symtab_error; + + if (scf_asm_dfa_init(_asm) < 0) { + scf_loge("\n"); + goto dfa_error; + } + + *pasm = _asm; + return 0; + +dfa_error: + scf_vector_free(_asm->symtab); +symtab_error: + scf_vector_free(_asm->labels); +labels_error: + scf_vector_free(_asm->global); +global_error: + scf_vector_free(_asm->data_relas); +data_rela_error: + scf_vector_free(_asm->text_relas); +text_rela_error: + scf_vector_free(_asm->data); +data_error: + scf_vector_free(_asm->text); +text_error: + free(_asm); + return -1; +} + +int scf_asm_close(scf_asm_t* _asm) +{ + if (_asm) { + free(_asm); + _asm = NULL; + } + + return 0; +} + +int scf_asm_file(scf_asm_t* _asm, const char* path) +{ + if (!_asm || !path) + return -EINVAL; + + scf_lex_t* lex = _asm->lex_list; + + while (lex) { + if (!strcmp(lex->file->data, path)) + break; + + lex = lex->next; + } + + if (lex) { + _asm->lex = lex; + return 0; + } + + if (scf_lex_open(&_asm->lex, path, NULL) < 0) + return -1; + + _asm->lex->next = _asm->lex_list; + _asm->lex_list = _asm->lex; + + dfa_asm_t* d = _asm->dfa_data; + scf_lex_word_t* w = NULL; + + int ret = 0; + + while (1) { + ret = scf_lex_pop_word(_asm->lex, &w); + if (ret < 0) { + scf_loge("lex pop word failed\n"); + break; + } + + if (SCF_LEX_WORD_EOF == w->type) { + scf_logi("eof\n\n"); + scf_lex_push_word(_asm->lex, w); + w = NULL; + break; + } + + if (SCF_LEX_WORD_SPACE == w->type) { + scf_lex_word_free(w); + w = NULL; + continue; + } + + if (SCF_LEX_WORD_SEMICOLON == w->type) { + scf_lex_word_free(w); + w = NULL; + continue; + } + + ret = scf_dfa_parse_word(_asm->dfa, w, d); + if (ret < 0) + break; + } + + fclose(_asm->lex->fp); + _asm->lex->fp = NULL; + return ret; +} + +static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm) +{ + scf_instruction_t* inst; + scf_string_t* text; + int ret; + int i; + + text = scf_string_alloc(); + if (!text) + return -ENOMEM; + + for (i = 0; i < _asm->text->size; i++) { + inst = _asm->text->data[i]; + + inst->offset = text->len; + + if (inst->len > 0) + ret = scf_string_cat_cstr_len(text, inst->code, inst->len); + + else if (inst->bin) + ret = scf_string_cat_cstr_len(text, inst->bin->data, inst->bin->len); + else + continue; + + if (ret < 0) { + scf_string_free(text); + return ret; + } + } + + int end = text->len; + for (i = _asm->text->size - 1; i >= 0; i--) { + inst = _asm->text->data[i]; + + if (inst->label && SCF_LEX_WORD_ID == inst->label->type) { + + ret = scf_symtab_add_sym(_asm->symtab, inst->label->text->data, end - inst->offset, inst->offset, ASM_SHNDX_TEXT, ELF64_ST_INFO(STB_GLOBAL, STT_FUNC)); + if (ret < 0) { + scf_string_free(text); + return ret; + } + + end = inst->offset; + } + } + + scf_elf_section_t cs = { + .name = ".text", + .sh_type = SHT_PROGBITS, + .sh_flags = SHF_ALLOC | SHF_EXECINSTR, + .sh_addralign = 1, + .data = text->data, + .data_len = text->len, + .index = ASM_SHNDX_TEXT, + }; + + ret = scf_elf_add_section(elf, &cs); + + scf_string_free(text); + return ret; +} + +static int __asm_add_data(scf_elf_context_t* elf, scf_asm_t* _asm) +{ + scf_instruction_t* inst; + scf_string_t* data; + int ret; + int i; + + data = scf_string_alloc(); + if (!data) + return -ENOMEM; + + for (i = 0; i < _asm->data->size; i++) { + inst = _asm->data->data[i]; + + inst->offset = data->len; + + if (inst->len > 0) + ret = scf_string_cat_cstr_len(data, inst->code, inst->len); + + else if (inst->bin) + ret = scf_string_cat_cstr_len(data, inst->bin->data, inst->bin->len); + else + continue; + + if (ret < 0) { + scf_string_free(data); + return ret; + } + } + + int end = data->len; + for (i = _asm->data->size - 1; i >= 0; i--) { + inst = _asm->data->data[i]; + + if (inst->label && SCF_LEX_WORD_ID == inst->label->type) { + + ret = scf_symtab_add_sym(_asm->symtab, inst->label->text->data, end - inst->offset, inst->offset, ASM_SHNDX_DATA, ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT)); + if (ret < 0) { + scf_string_free(data); + return ret; + } + + end = inst->offset; + } + } + + scf_elf_section_t ds = { + .name = ".data", + .sh_type = SHT_PROGBITS, + .sh_flags = SHF_ALLOC | SHF_WRITE, + .sh_addralign = 8, + .data = data->data, + .data_len = data->len, + .index = ASM_SHNDX_DATA, + }; + + ret = scf_elf_add_section(elf, &ds); + + scf_string_free(data); + return ret; +} + +static int __asm_add_rela_text(scf_elf_context_t* elf, scf_asm_t* _asm) +{ + scf_instruction_t* inst; + scf_vector_t* relas; + scf_rela_t* r; + int ret; + int i; + + relas = scf_vector_alloc(); + if (!relas) + return -ENOMEM; + + for (i = 0; i < _asm->text_relas->size; i++) { + r = _asm->text_relas->data[i]; + + r->text_offset = r->inst->offset + r->inst_offset; + + if (scf_vector_find(_asm->text, inst)) + ret = scf_symtab_add_rela(relas, _asm->symtab, r, r->name->data, ASM_SHNDX_TEXT); + else + ret = scf_symtab_add_rela(relas, _asm->symtab, r, r->name->data, 0); + + if (ret < 0) { + scf_loge("\n"); + goto error; + } + } + + ret = 0; + if (relas->size > 0) { + scf_elf_section_t s = {0}; + + s.name = ".rela.text"; + s.sh_type = SHT_RELA; + s.sh_flags = SHF_INFO_LINK; + s.sh_addralign = 8; + s.data = NULL; + s.data_len = 0; + s.sh_link = 0; + s.sh_info = ASM_SHNDX_TEXT; + + ret = scf_elf_add_rela_section(elf, &s, relas); + } +error: + scf_vector_clear(relas, (void (*)(void*) ) free); + scf_vector_free (relas); + return ret; +} + +static int __asm_add_rela_data(scf_elf_context_t* elf, scf_asm_t* _asm) +{ + scf_instruction_t* inst; + scf_vector_t* relas; + scf_rela_t* r; + int ret; + int i; + + relas = scf_vector_alloc(); + if (!relas) + return -ENOMEM; + + for (i = 0; i < _asm->data_relas->size; i++) { + r = _asm->data_relas->data[i]; + + r->text_offset = r->inst->offset + r->inst_offset; + + if (scf_vector_find(_asm->data, inst)) + ret = scf_symtab_add_rela(relas, _asm->symtab, r, r->name->data, ASM_SHNDX_DATA); + else + ret = scf_symtab_add_rela(relas, _asm->symtab, r, r->name->data, 0); + + if (ret < 0) { + scf_loge("\n"); + goto error; + } + } + + ret = 0; + if (relas->size > 0) { + scf_elf_section_t s = {0}; + + s.name = ".rela.text"; + s.sh_type = SHT_RELA; + s.sh_flags = SHF_INFO_LINK; + s.sh_addralign = 8; + s.data = NULL; + s.data_len = 0; + s.sh_link = 0; + s.sh_info = ASM_SHNDX_TEXT; + + ret = scf_elf_add_rela_section(elf, &s, relas); + } +error: + scf_vector_clear(relas, (void (*)(void*) ) free); + scf_vector_free (relas); + return ret; +} + +int scf_asm_to_obj(scf_asm_t* _asm, const char* obj, const char* arch) +{ + scf_lex_t* lex = _asm->lex_list; + + while (lex) { + int ret = scf_symtab_add_sym(_asm->symtab, lex->file->data, 0, 0, SHN_ABS, ELF64_ST_INFO(STB_LOCAL, STT_FILE)); + if (ret < 0) { + scf_loge("\n"); + return ret; + } + + lex = lex->next; + } + + ADD_SECTION_SYMBOL(_asm->symtab, ASM_SHNDX_TEXT, ".text"); + ADD_SECTION_SYMBOL(_asm->symtab, ASM_SHNDX_DATA, ".data"); + + scf_elf_context_t* elf = NULL; + + int ret = scf_elf_open(&elf, arch, obj, "wb"); + if (ret < 0) { + scf_loge("open '%s' elf file '%s' failed\n", arch, obj); + return ret; + } + + ret = __asm_add_text(elf, _asm); + if (ret < 0) + goto error; + + ret = __asm_add_data(elf, _asm); + if (ret < 0) + goto error; + + qsort(_asm->symtab->data, _asm->symtab->size, sizeof(void*), __symtab_sort_cmp); + + ret = __asm_add_rela_text(elf, _asm); + if (ret < 0) + goto error; + + ret = __asm_add_rela_data(elf, _asm); + if (ret < 0) + goto error; + + scf_elf_sym_t* sym; + int i; + + for (i = 0; i < _asm->symtab->size; i++) { + sym = _asm->symtab->data[i]; + + ret = scf_elf_add_sym(elf, sym, ".symtab"); + if (ret < 0) + goto error; + } + + ret = scf_elf_write_rel(elf); +error: + scf_elf_close(elf); + return ret; +} diff --git a/asm/scf_asm.h b/asm/scf_asm.h new file mode 100644 index 0000000..2bcc75d --- /dev/null +++ b/asm/scf_asm.h @@ -0,0 +1,87 @@ +#ifndef SCF_ASM_H +#define SCF_ASM_H + +#include"scf_lex.h" +#include"scf_dfa.h" +#include"scf_stack.h" +#include"scf_dwarf.h" +#include"scf_native.h" +#include"scf_elf.h" + +typedef struct scf_asm_s scf_asm_t; +typedef struct dfa_asm_s dfa_asm_t; + +#define ASM_SHNDX_TEXT 1 +#define ASM_SHNDX_DATA 2 + +struct scf_asm_s +{ + scf_lex_t* lex_list; + scf_lex_t* lex; + + scf_dfa_t* dfa; + dfa_asm_t* dfa_data; + + scf_vector_t* text; // .text + scf_vector_t* data; // .data + scf_vector_t* current; // point to '.text' or '.data' by asm source code + + scf_vector_t* text_relas; + scf_vector_t* data_relas; + + scf_vector_t* global; // .global + scf_vector_t* labels; + + scf_vector_t* symtab; + scf_dwarf_t* debug; +}; + +struct dfa_asm_s { + void** module_datas; + + scf_lex_word_t* label; + scf_lex_word_t* global; + + scf_OpCode_t* opcode; + scf_inst_data_t operands[4]; + int i; + + int type; + + int n_comma; + int n_lp; + int n_rp; +}; + +int scf_asm_dfa_init(scf_asm_t* _asm); + +int scf_asm_open (scf_asm_t** pasm); +int scf_asm_close (scf_asm_t* _asm); + +int scf_asm_file (scf_asm_t* _asm, const char* path); +int scf_asm_to_obj(scf_asm_t* _asm, const char* obj, const char* arch); + + +static inline int __inst_data_is_reg(scf_inst_data_t* id) +{ + if (!id->flag && id->base && 0 == id->imm_size) + return 1; + return 0; +} + +static inline int __inst_data_is_const(scf_inst_data_t* id) +{ + if (!id->flag && id->imm_size > 0) + return 1; + return 0; +} + +static inline int __lex_word_cmp(const void* v0, const void* v1) +{ + const scf_lex_word_t* w0 = v0; + const scf_lex_word_t* w1 = v1; + + return scf_string_cmp(w0->text, w1->text); +} + +#endif diff --git a/asm/scf_dfa_asm.c b/asm/scf_dfa_asm.c new file mode 100644 index 0000000..5519325 --- /dev/null +++ b/asm/scf_dfa_asm.c @@ -0,0 +1,98 @@ +#include"scf_asm.h" +#include"scf_lex_word.h" + +extern scf_dfa_module_t dfa_module_inst; + +static scf_dfa_module_t* asm_dfa_modules[] = +{ + &dfa_module_inst, +}; + +static void* dfa_pop_word(scf_dfa_t* dfa) +{ + scf_asm_t* _asm = dfa->priv; + + scf_lex_word_t* w = NULL; + scf_lex_pop_word(_asm->lex, &w); + return w; +} + +static int dfa_push_word(scf_dfa_t* dfa, void* word) +{ + scf_asm_t* _asm = dfa->priv; + + scf_lex_word_t* w = word; + scf_lex_push_word(_asm->lex, w); + return 0; +} + +static void dfa_free_word(void* word) +{ + scf_lex_word_t* w = word; + scf_lex_word_free(w); +} + +scf_dfa_ops_t dfa_ops_asm = +{ + .name = "asm", + + .pop_word = dfa_pop_word, + .push_word = dfa_push_word, + .free_word = dfa_free_word, +}; + +int scf_asm_dfa_init(scf_asm_t* _asm) +{ + if (scf_dfa_open(&_asm->dfa, &dfa_ops_asm, _asm) < 0) { + scf_loge("\n"); + return -1; + } + + int nb_modules = sizeof(asm_dfa_modules) / sizeof(asm_dfa_modules[0]); + + _asm->dfa_data = calloc(1, sizeof(dfa_asm_t)); + if (!_asm->dfa_data) { + scf_loge("\n"); + return -1; + } + + _asm->dfa_data->module_datas = calloc(nb_modules, sizeof(void*)); + if (!_asm->dfa_data->module_datas) { + scf_loge("\n"); + return -1; + } + + int i; + for (i = 0; i < nb_modules; i++) { + + scf_dfa_module_t* m = asm_dfa_modules[i]; + + if (!m) + continue; + + m->index = i; + + if (!m->init_module) + continue; + + if (m->init_module(_asm->dfa) < 0) { + scf_loge("init module: %s\n", m->name); + return -1; + } + } + + for (i = 0; i < nb_modules; i++) { + + scf_dfa_module_t* m = asm_dfa_modules[i]; + + if (!m || !m->init_syntax) + continue; + + if (m->init_syntax(_asm->dfa) < 0) { + scf_loge("init syntax: %s\n", m->name); + return -1; + } + } + + return 0; +} diff --git a/asm/scf_dfa_inst.c b/asm/scf_dfa_inst.c new file mode 100644 index 0000000..4b2d223 --- /dev/null +++ b/asm/scf_dfa_inst.c @@ -0,0 +1,532 @@ +#include"scf_dfa.h" +#include"scf_dfa_util.h" +#include"scf_asm.h" +#include"scf_x64_opcode.h" +#include"scf_x64_reg.h" + +extern scf_dfa_module_t dfa_module_inst; + +static int _inst_is_text(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_TEXT == w->type; +} + +static int _inst_is_data(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_DATA == w->type; +} + +static int _inst_is_global(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_GLOBAL == w->type; +} + +static int _inst_is_asciz(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_ASCIZ == w->type; +} + +static int _inst_is_str(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_CONST_STRING == w->type; +} + +static int _inst_is_percent(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_MOD == w->type; +} + +static int _inst_is_opcode(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return !!x64_find_OpCode_by_name(w->text->data); +} + +static int _inst_is_reg(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return !!x64_find_register(w->text->data); +} + +static int _inst_action_text(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + _asm->current = _asm->text; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_data(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + _asm->current = _asm->data; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_global(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + d->global = w; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_asciz(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + d->type = w->type; + + scf_logi("w: '%s'\n", w->text->data); + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_str(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + scf_instruction_t* inst; + + inst = calloc(1, sizeof(scf_instruction_t)); + if (!inst) + return -ENOMEM; + + int n; + switch (d->type) { + case SCF_LEX_WORD_ASM_ASCIZ: + n = w->data.s->len + 1; + break; + + case SCF_LEX_WORD_ASM_ASCII: + n = w->data.s->len; + break; + default: + scf_loge("const string '%s' MUST be '.asciz' or '.ascii' type, file: %s, line: %d\n", + w->text->data, w->file->data, w->line); + + scf_instruction_free(inst); + return -1; + break; + }; + + if (n <= sizeof(inst->code)) { + memcpy(inst->code, w->data.s->data, n); + inst->len = n; + } else { + inst->bin = scf_string_cstr_len(w->data.s->data, n); + if (!inst->bin) { + scf_instruction_free(inst); + return -ENOMEM; + } + } + + X64_INST_ADD_CHECK(_asm->current, inst); + if (d->label) { + inst->label = d->label; + d ->label = NULL; + } + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_identity(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + scf_lex_word_t* w2; + + if (d->opcode) { + d->operands[d->i].label = w; + d->operands[d->i].flag = 1; + + w = dfa->ops->pop_word(dfa); + dfa->ops->push_word(dfa, w); + + if (SCF_LEX_WORD_LF == w->type || SCF_LEX_WORD_COMMA == w->type) { + if (d->n_lp != d->n_rp) { + scf_loge("'(' not equal to ')' in file: %s, line: %d\n", w->file->data, w->line); + return SCF_DFA_ERROR; + } + + d->i++; + } + + } else if (d->global) { + if (scf_vector_find_cmp(_asm->global, w, __lex_word_cmp)) + return SCF_DFA_NEXT_WORD; + + w2 = scf_lex_word_clone(w); + if (!w2) + return SCF_DFA_ERROR; + + int ret = scf_vector_add(_asm->global, w2); + if (ret < 0) { + scf_lex_word_free(w2); + return ret; + } + } + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_opcode(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + d->opcode = (scf_OpCode_t*)x64_find_OpCode_by_name(w->text->data); + if (!d->opcode) { + scf_loge("opcode '%s' NOT found\n", w->text->data); + return SCF_DFA_ERROR; + } + + d->i = 0; + d->n_comma = 0; + d->n_lp = 0; + d->n_rp = 0; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_reg(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + scf_register_t* r = x64_find_register(w->text->data); + + if (!r) { + scf_loge("register '%s' NOT found\n", w->text->data); + return SCF_DFA_ERROR; + } + + switch (d->n_comma) { + case 0: + d->operands[d->i].base = r; + break; + case 1: + d->operands[d->i].index = r; + break; + default: + scf_loge("\n"); + return SCF_DFA_ERROR; + break; + }; + + if (d->n_lp == d->n_rp) + d->i++; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_comma(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + if (d->n_lp != d->n_rp) + d->n_comma++; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_colon(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + scf_lex_word_t* w0; + + if (words->size < 2) { + scf_loge("no label before '%s', file: %s, line: %d\n", w->text->data, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + w = words->data[words->size - 2]; + + if (SCF_LEX_WORD_ID == w->type) { + w0 = scf_vector_find_cmp(_asm->labels, w, __lex_word_cmp); + if (w0) { + scf_loge("repeated label '%s' in file: %s, line: %d, first in file: %s, line: %d\n", + w->text->data, w->file->data, w->line, w0->file->data, w0->line); + return SCF_DFA_ERROR; + } + + w0 = scf_lex_word_clone(w); + if (!w0) + return -ENOMEM; + + int ret = scf_vector_add(_asm->labels, w0); + if (ret < 0) { + scf_lex_word_free(w0); + return ret; + } + } + + d->label = scf_lex_word_clone(w); + if (!d->label) + return -ENOMEM; + + return SCF_DFA_NEXT_WORD; +} + +static int _inst_action_LF(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = data; + scf_lex_word_t* w = words->data[words->size - 1]; + + if (d->opcode) { + scf_instruction_t* inst; + scf_x64_OpCode_t* opcode = (scf_x64_OpCode_t*)d->opcode; + scf_inst_data_t* id0; + scf_inst_data_t* id1; + + int OpBytes = opcode->OpBytes; + int RegBytes = opcode->RegBytes; + int EG = opcode->EG; + + if (2 == d->i) { + id0 = &d->operands[0]; + id1 = &d->operands[1]; + + if (__inst_data_is_reg(id0)) { + OpBytes = id0->base->bytes; + + if (__inst_data_is_reg(id1)) { + RegBytes = id1->base->bytes; + + opcode = x64_find_OpCode(d->opcode->type, OpBytes, RegBytes, SCF_X64_E2G); + if (!opcode) { + scf_loge("valid opcode '%s' NOT found, file: %s, line: %d\n", d->opcode->name, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + inst = x64_make_inst_E2G(opcode, id1->base, id0->base); + X64_INST_ADD_CHECK(_asm->current, inst); + + } + + } else if (__inst_data_is_reg(id1)) { + RegBytes = id1->base->bytes; + + opcode = x64_find_OpCode(d->opcode->type, OpBytes, RegBytes, SCF_X64_E2G); + if (!opcode) { + scf_loge("valid opcode '%s' NOT found, file: %s, line: %d\n", d->opcode->name, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + scf_rela_t* rela = NULL; + + inst = x64_make_inst_L2G(&rela, opcode, id1->base); + X64_INST_ADD_CHECK(_asm->current, inst); + X64_RELA_ADD_LABEL(_asm->text_relas, rela, inst, id0->label->text); + } + + } else if (1 == d->i) { + id0 = &d->operands[0]; + + if (__inst_data_is_reg(id0)) { + OpBytes = id0->base->bytes; + + opcode = x64_find_OpCode(d->opcode->type, OpBytes, RegBytes, SCF_X64_G); + if (!opcode) { + opcode = x64_find_OpCode(d->opcode->type, OpBytes, RegBytes, SCF_X64_E); + if (!opcode) { + scf_loge("valid opcode '%s' NOT found, file: %s, line: %d\n", d->opcode->name, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + inst = x64_make_inst_E(opcode, id0->base); + } else + inst = x64_make_inst_G(opcode, id0->base); + X64_INST_ADD_CHECK(_asm->current, inst); + + } else { + scf_rela_t* rela = NULL; + uint32_t disp = 0; + + if (SCF_X64_CALL == d->opcode->type) { + opcode = x64_find_OpCode(d->opcode->type, 4, 4, SCF_X64_I); + inst = x64_make_inst_I(opcode, (uint8_t*)&disp, 4); + X64_INST_ADD_CHECK(_asm->current, inst); + + rela = calloc(1, sizeof(scf_rela_t)); + if (!rela) + return -ENOMEM; + + rela->inst_offset = 1; + X64_RELA_ADD_LABEL(_asm->text_relas, rela, inst, id0->label->text); + } else { + opcode = x64_find_OpCode(d->opcode->type, OpBytes, RegBytes, SCF_X64_E); + if (!opcode) { + scf_loge("valid opcode '%s' NOT found, file: %s, line: %d\n", d->opcode->name, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + inst = x64_make_inst_L(&rela, opcode); + X64_INST_ADD_CHECK(_asm->current, inst); + X64_RELA_ADD_LABEL(_asm->text_relas, rela, inst, id0->label->text); + } + } + } else { + inst = x64_make_inst(opcode, 8); + X64_INST_ADD_CHECK(_asm->current, inst); + } + + if (d->label) { + inst->label = d->label; + d ->label = NULL; + } + + scf_instruction_print(inst); + d->opcode = NULL; + } + + for (int i = 0; i < 4; i++) { + d->operands[i].base = NULL; + d->operands[i].index = NULL; + d->operands[i].scale = 0; + d->operands[i].disp = 0; + + d->operands[i].flag = 0; + d->operands[i].label = NULL; + d->operands[i].imm = 0; + d->operands[i].imm_size = 0; + } + + d->global = NULL; + + return SCF_DFA_OK; +} + +static int _dfa_init_module_inst(scf_dfa_t* dfa) +{ + SCF_DFA_MODULE_NODE(dfa, inst, text, _inst_is_text, _inst_action_text); + SCF_DFA_MODULE_NODE(dfa, inst, data, _inst_is_data, _inst_action_data); + SCF_DFA_MODULE_NODE(dfa, inst, global, _inst_is_global, _inst_action_global); + + SCF_DFA_MODULE_NODE(dfa, inst, asciz, _inst_is_asciz, _inst_action_asciz); + SCF_DFA_MODULE_NODE(dfa, inst, str, _inst_is_str, _inst_action_str); + + SCF_DFA_MODULE_NODE(dfa, inst, identity, scf_dfa_is_identity, _inst_action_identity); + SCF_DFA_MODULE_NODE(dfa, inst, colon, scf_dfa_is_colon, _inst_action_colon); + + SCF_DFA_MODULE_NODE(dfa, inst, opcode, _inst_is_opcode, _inst_action_opcode); + SCF_DFA_MODULE_NODE(dfa, inst, reg, _inst_is_reg, _inst_action_reg); + SCF_DFA_MODULE_NODE(dfa, inst, percent, _inst_is_percent, scf_dfa_action_next); + + SCF_DFA_MODULE_NODE(dfa, inst, comma, scf_dfa_is_comma, _inst_action_comma); + SCF_DFA_MODULE_NODE(dfa, inst, LF, scf_dfa_is_LF, _inst_action_LF); + + scf_asm_t* _asm = dfa->priv; + dfa_asm_t* d = _asm->dfa_data; + + d->type = SCF_LEX_WORD_ASM_BYTE; + + return SCF_DFA_OK; +} + +static int _dfa_init_syntax_inst(scf_dfa_t* dfa) +{ + SCF_DFA_GET_MODULE_NODE(dfa, inst, text, text); + SCF_DFA_GET_MODULE_NODE(dfa, inst, data, data); + SCF_DFA_GET_MODULE_NODE(dfa, inst, global, global); + + SCF_DFA_GET_MODULE_NODE(dfa, inst, identity, identity); + SCF_DFA_GET_MODULE_NODE(dfa, inst, colon, colon); + + SCF_DFA_GET_MODULE_NODE(dfa, inst, asciz, asciz); + SCF_DFA_GET_MODULE_NODE(dfa, inst, str, str); + + SCF_DFA_GET_MODULE_NODE(dfa, inst, opcode, opcode); + SCF_DFA_GET_MODULE_NODE(dfa, inst, reg, reg); + SCF_DFA_GET_MODULE_NODE(dfa, inst, percent, percent); + + SCF_DFA_GET_MODULE_NODE(dfa, inst, comma, comma); + SCF_DFA_GET_MODULE_NODE(dfa, inst, LF, LF); + + // asm syntax + scf_vector_add(dfa->syntaxes, text); + scf_vector_add(dfa->syntaxes, data); + scf_vector_add(dfa->syntaxes, global); + + scf_vector_add(dfa->syntaxes, opcode); + scf_vector_add(dfa->syntaxes, identity); + + scf_vector_add(dfa->syntaxes, asciz); + scf_vector_add(dfa->syntaxes, LF); + + // .text .data + scf_dfa_node_add_child(text, LF); + scf_dfa_node_add_child(data, LF); + + // .global + scf_dfa_node_add_child(global, identity); + scf_dfa_node_add_child(identity, comma); + scf_dfa_node_add_child(comma, identity); + scf_dfa_node_add_child(identity, LF); + scf_dfa_node_add_child(comma, LF); + scf_dfa_node_add_child(global, LF); + + // label: + scf_dfa_node_add_child(identity, colon); + scf_dfa_node_add_child(colon, LF); + + // .asciz + scf_dfa_node_add_child(asciz, str); + scf_dfa_node_add_child(str, LF); + + // asm instruction + scf_dfa_node_add_child(opcode, identity); + + scf_dfa_node_add_child(opcode, reg); + scf_dfa_node_add_child(reg, comma); + scf_dfa_node_add_child(comma, reg); + scf_dfa_node_add_child(reg, LF); + + // %reg of 'AT & T' + scf_dfa_node_add_child(percent, reg); + scf_dfa_node_add_child(opcode, percent); + scf_dfa_node_add_child(comma, percent); + + return SCF_DFA_OK; +} + +scf_dfa_module_t dfa_module_inst = +{ + .name = "inst", + .init_module = _dfa_init_module_inst, + .init_syntax = _dfa_init_syntax_inst, +}; diff --git a/core/scf_lex_word.h b/core/scf_lex_word.h index 543aae6..0e21194 100644 --- a/core/scf_lex_word.h +++ b/core/scf_lex_word.h @@ -112,6 +112,21 @@ enum scf_lex_words SCF_LEX_WORD_KEY_DEFINE, // #define SCF_LEX_WORD_KEY_ENDIF, // #endif + // asm key word + SCF_LEX_WORD_ASM_TEXT, // .text + SCF_LEX_WORD_ASM_DATA, // .data + SCF_LEX_WORD_ASM_GLOBAL, // .global + SCF_LEX_WORD_ASM_ALIGN, // .align + SCF_LEX_WORD_ASM_ORG, // .org + + SCF_LEX_WORD_ASM_FILL, // .fill + SCF_LEX_WORD_ASM_BYTE, // .byte + SCF_LEX_WORD_ASM_WORD, // .word + SCF_LEX_WORD_ASM_LONG, // .long + SCF_LEX_WORD_ASM_QUAD, // .quad + SCF_LEX_WORD_ASM_ASCII, // .ascii + SCF_LEX_WORD_ASM_ASCIZ, // .asciz + // data types SCF_LEX_WORD_KEY_CHAR, // char diff --git a/docs/Naja_sys.txt b/docs/Naja_sys.txt index fe5a334..3713b28 100644 --- a/docs/Naja_sys.txt +++ b/docs/Naja_sys.txt @@ -131,8 +131,9 @@ smov sr, rd // D = 1, rd --> sr 62, iret, opcode = 62 ------------------------------------------------------------------------------------------------ |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0| -| 1 1| 1 1 1 0| 0| 0 0 0 0| 0| 0 0| 0 0| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0| +| 1 1| 1 1 1 0| 0| 0 0 0 0| 0| 0 0| D 0| 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0| // S = 1 -iret +iret // D = 0, +syscall // D = 1, ------------------------------------------------------------------------------------------------ diff --git a/elf/scf_elf.h b/elf/scf_elf.h index cd3a606..c810fc9 100644 --- a/elf/scf_elf.h +++ b/elf/scf_elf.h @@ -92,8 +92,7 @@ int scf_elf_open (scf_elf_context_t** pelf, const char* machine, const char* pat int scf_elf_open2(scf_elf_context_t* elf, const char* machine); int scf_elf_close(scf_elf_context_t* elf); -int scf_elf_add_sym (scf_elf_context_t* elf, const scf_elf_sym_t* sym, const char* sh_name); - +int scf_elf_add_sym (scf_elf_context_t* elf, const scf_elf_sym_t* sym, const char* sh_name); int scf_elf_add_section(scf_elf_context_t* elf, const scf_elf_section_t* section); int scf_elf_add_rela_section(scf_elf_context_t* elf, const scf_elf_section_t* section, scf_vector_t* relas); diff --git a/lex/scf_lex.c b/lex/scf_lex.c index 923b543..5c8a2ff 100644 --- a/lex/scf_lex.c +++ b/lex/scf_lex.c @@ -79,6 +79,20 @@ static scf_key_word_t key_words[] = {"enum", SCF_LEX_WORD_KEY_ENUM}, {"union", SCF_LEX_WORD_KEY_UNION}, {"struct", SCF_LEX_WORD_KEY_STRUCT}, + + {".text", SCF_LEX_WORD_ASM_TEXT}, + {".data", SCF_LEX_WORD_ASM_DATA}, + {".global", SCF_LEX_WORD_ASM_GLOBAL}, + {".align", SCF_LEX_WORD_ASM_ALIGN}, + {".org", SCF_LEX_WORD_ASM_ORG}, + + {".fill", SCF_LEX_WORD_ASM_FILL}, + {".byte", SCF_LEX_WORD_ASM_BYTE}, + {".word", SCF_LEX_WORD_ASM_WORD}, + {".long", SCF_LEX_WORD_ASM_LONG}, + {".quad", SCF_LEX_WORD_ASM_QUAD}, + {".ascii", SCF_LEX_WORD_ASM_ASCII}, + {".asciz", SCF_LEX_WORD_ASM_ASCIZ}, }; static scf_escape_char_t escape_chars[] = @@ -89,7 +103,7 @@ static scf_escape_char_t escape_chars[] = {'0', '\0'}, }; -static int _find_key_word(const char* text) +int _find_key_word(const char* text) { int i; for (i = 0; i < sizeof(key_words) / sizeof(key_words[0]); i++) { @@ -114,7 +128,7 @@ static int _find_escape_char(const int c) return c; } -int scf_lex_open(scf_lex_t** plex, const char* path) +int scf_lex_open(scf_lex_t** plex, const char* path, scf_string_t* text) { if (!plex || !path) return -EINVAL; @@ -123,24 +137,35 @@ int scf_lex_open(scf_lex_t** plex, const char* path) if (!lex) return -ENOMEM; - lex->fp = fopen(path, "r"); - if (!lex->fp) { - - char cwd[4096]; - getcwd(cwd, 4095); - scf_loge("open file '%s' failed, errno: %d, default path dir: %s\n", path, errno, cwd); - - free(lex); - return -1; - } - lex->file = scf_string_cstr(path); if (!lex->file) { - fclose(lex->fp); free(lex); return -ENOMEM; } + if (!text) { + lex->fp = fopen(path, "r"); + if (!lex->fp) { + char cwd[4096]; + getcwd(cwd, 4095); + scf_loge("open file '%s' failed, errno: %d, default path dir: %s\n", path, errno, cwd); + + scf_string_free(lex->file); + free(lex); + return -1; + } + + size_t len = strlen(path); + + if (len > 2) { + if ('.' == path[len - 2] && 's' == (0x20 | path[len - 1])) + lex->asm_flag = 1; + } + } else { + lex->text = text; + lex->asm_flag = 1; + } + lex->nb_lines = 1; *plex = lex; @@ -160,7 +185,8 @@ int scf_lex_close(scf_lex_t* lex) scf_string_free(lex->file); - fclose(lex->fp); + if (lex->fp) + fclose(lex->fp); free(lex); } return 0; @@ -657,7 +683,8 @@ int __lex_pop_word(scf_lex_t* lex, scf_lex_word_t** pword) lex->nb_lines++; lex->pos = 0; - if (SCF_UTF8_LF == c->flag) { + if (SCF_UTF8_LF == c->flag || lex->asm_flag) + { w = scf_lex_word_alloc(lex->file, lex->nb_lines, lex->pos, SCF_LEX_WORD_LF); w->text = scf_string_cstr("LF"); *pword = w; diff --git a/lex/scf_lex.h b/lex/scf_lex.h index ebb946a..d875be6 100644 --- a/lex/scf_lex.h +++ b/lex/scf_lex.h @@ -37,11 +37,16 @@ struct scf_lex_s scf_vector_t* macros; - FILE* fp; // file pointer to the code + FILE* fp; // file pointer to the code + + scf_string_t* text; // text string to the code, 'fp' & 'text' only one is used + int text_i; scf_string_t* file; // original code file name int nb_lines; int pos; + + uint8_t asm_flag:1; }; @@ -49,7 +54,7 @@ scf_char_t* _lex_pop_char (scf_lex_t* lex); void _lex_push_char(scf_lex_t* lex, scf_char_t* c); -int scf_lex_open (scf_lex_t** plex, const char* path); +int scf_lex_open (scf_lex_t** plex, const char* path, scf_string_t* text); int scf_lex_close(scf_lex_t* lex); void scf_lex_push_word(scf_lex_t* lex, scf_lex_word_t* word); diff --git a/lex/scf_lex_test.c b/lex/scf_lex_test.c index 8f94494..28e8223 100644 --- a/lex/scf_lex_test.c +++ b/lex/scf_lex_test.c @@ -4,7 +4,9 @@ int main(int argc, char* argv[]) { scf_lex_t* lex = NULL; - if (scf_lex_open(&lex, argv[1]) < 0) { + scf_string_t* text = scf_string_cstr("document.write(\"hello js\n\")"); + + if (scf_lex_open(&lex, argv[1], text) < 0) { scf_loge("\n"); return -1; } diff --git a/lex/scf_lex_util.c b/lex/scf_lex_util.c index 1379a8e..f18756f 100644 --- a/lex/scf_lex_util.c +++ b/lex/scf_lex_util.c @@ -1,9 +1,20 @@ #include"scf_lex.h" +int _find_key_word(const char* text); + +static int __lex_getc(scf_lex_t* lex) +{ + if (lex->fp) + return fgetc(lex->fp); + + if (lex->text_i < lex->text->len) + return lex->text->data[lex->text_i++]; + return EOF; +} + scf_char_t* _lex_pop_char(scf_lex_t* lex) { assert(lex); - assert(lex->fp); scf_char_t* c; @@ -17,7 +28,7 @@ scf_char_t* _lex_pop_char(scf_lex_t* lex) if (!c) return NULL; - int ret = fgetc(lex->fp); + int ret = __lex_getc(lex); if (EOF == ret) { c->c = ret; return c; @@ -60,7 +71,7 @@ scf_char_t* _lex_pop_char(scf_lex_t* lex) int i; for (i = 1; i < c->len; i++) { - ret = fgetc(lex->fp); + ret = __lex_getc(lex); if (0x2 == (ret >> 6)) { c->c <<= 6; @@ -537,6 +548,47 @@ int _lex_dot(scf_lex_t* lex, scf_lex_word_t** pword, scf_char_t* c0) } } else { + if (lex->asm_flag + && 'a' <= (c1->c | 0x20) + && 'z' >= (c1->c | 0x20)) { + + do { + scf_string_cat_cstr_len(s, c1->utf8, 1); + lex->pos++; + + free(c1); + c1 = _lex_pop_char(lex); + + if (!c1) { + scf_string_free(s); + return -ENOMEM; + } + } while ('a' <= (c1->c | 0x20) && 'z' >= (c1->c | 0x20)); + + _lex_push_char(lex, c1); + c1 = NULL; + + int type = _find_key_word(s->data); + if (type < 0) { + scf_loge("unknown asm key word '%s', file: %s, line: %d\n", s->data, lex->file->data, lex->nb_lines); + + scf_string_free(s); + return -EINVAL; + } + + w = scf_lex_word_alloc(lex->file, lex->nb_lines, lex->pos, type); + if (!w) { + scf_string_free(s); + return -ENOMEM; + } + + w->text = s; + s = NULL; + + *pword = w; + return 0; + } + w = scf_lex_word_alloc(lex->file, lex->nb_lines, lex->pos, SCF_LEX_WORD_DOT); w->text = s; s = NULL; diff --git a/native/scf_instruction.c b/native/scf_instruction.c new file mode 100644 index 0000000..92519a8 --- /dev/null +++ b/native/scf_instruction.c @@ -0,0 +1,76 @@ +#include"scf_instruction.h" + +void scf_rela_free(scf_rela_t* rela) +{ + if (rela) { + if (rela->name) + scf_string_free(rela->name); + + free(rela); + } +} + +void scf_instruction_free(scf_instruction_t* inst) +{ + if (inst) { + if (inst->label) + scf_lex_word_free(inst->label); + + if (inst->bin) + scf_string_free(inst->bin); + + free(inst); + } +} + +void scf_instruction_print(scf_instruction_t* inst) +{ + if (inst->label) + printf("%s: ", inst->label->text->data); + + if (inst->OpCode) + printf("%s ", inst->OpCode->name); + + if (1 == inst->src.flag) { + if (inst->src.index) + printf("%d(%s, %s, %d), ", inst->src.disp, inst->src.base->name, + inst->src.index->name, inst->src.scale); + + else if (inst->src.base) { + if (inst->src.disp < 0) + printf("-%#x(%s), ", -inst->src.disp, inst->src.base->name); + else + printf("%#x(%s), ", inst->src.disp, inst->src.base->name); + } else + printf("%d(rip), ", inst->dst.disp); + + } else if (inst->src.base) + printf("%s, ", inst->src.base->name); + + else if (inst->src.imm_size > 0) + printf("%d, ", (int)inst->src.imm); + + if (1 == inst->dst.flag) { + if (inst->dst.index) + printf("%d(%s, %s, %d)", inst->dst.disp, inst->dst.base->name, + inst->dst.index->name, inst->dst.scale); + + else if (inst->dst.base) { + if (inst->dst.disp < 0) + printf("-%#x(%s)", -inst->dst.disp, inst->dst.base->name); + else + printf("%#x(%s)", inst->dst.disp, inst->dst.base->name); + } else + printf("%d(rip)", inst->dst.disp); + + } else if (inst->dst.base) + printf("%s", inst->dst.base->name); + + else if (inst->dst.imm_size > 0) + printf("%d", (int)inst->dst.imm); + + int i; + for (i = 0; i < inst->len; i++) + printf(" %#x", inst->code[i]); + printf("\n"); +} diff --git a/native/scf_instruction.h b/native/scf_instruction.h new file mode 100644 index 0000000..bd08f01 --- /dev/null +++ b/native/scf_instruction.h @@ -0,0 +1,113 @@ +#ifndef SCF_INSTRUCTION_H +#define SCF_INSTRUCTION_H + +#include"scf_lex_word.h" +#include"scf_core_types.h" + +typedef struct scf_instruction_s scf_instruction_t; + +struct scf_register_s +{ + uint32_t id; + int bytes; + char* name; + + intptr_t color; + + scf_vector_t* dag_nodes; + + uint32_t updated; + uint32_t used; +}; +#define SCF_COLOR_CONFLICT(c0, c1) ((c0) >> 16 == (c1) >> 16 && (c0) & (c1) & 0xffff) + +struct scf_OpCode_s +{ + int type; + char* name; +}; + +typedef struct { + scf_register_t* base; + scf_register_t* index; + + int32_t scale; + int32_t disp; + int32_t size; +} scf_sib_t; + +typedef struct { + scf_register_t* base; + scf_register_t* index; + int scale; + int disp; + + scf_lex_word_t* label; + uint64_t imm; + int imm_size; + + uint8_t flag; +} scf_inst_data_t; + +struct scf_instruction_s +{ + scf_3ac_code_t* c; + + scf_OpCode_t* OpCode; + scf_instruction_t* next; // only for jcc, jmp, call + + scf_inst_data_t src; + scf_inst_data_t dst; + + scf_lex_word_t* label; // asm label + scf_string_t* bin; // asm binary data, maybe in .text or .data + int offset; // asm offset, maybe in .text or .data + + int len; + uint8_t code[32]; + + int nb_used; +}; + +typedef struct { + scf_3ac_code_t* code; // related 3ac code + scf_function_t* func; + scf_variable_t* var; + scf_string_t* name; + + scf_instruction_t* inst; + int inst_offset; // byte offset in instruction + int64_t text_offset; // byte offset in .text segment + uint64_t type; + int addend; +} scf_rela_t; + +static inline int scf_inst_data_same(scf_inst_data_t* id0, scf_inst_data_t* id1) +{ + // global var, are considered as different. + if ((id0->flag && !id0->base) || (id1->flag && !id1->base)) + return 0; + + if (id0->scale == id1->scale + && id0->disp == id1->disp + && id0->flag == id1->flag + && id0->imm == id1->imm + && id0->imm_size == id1->imm_size) { + + if (id0->base == id1->base + || (id0->base && id1->base && SCF_COLOR_CONFLICT(id0->base->color, id1->base->color))) { + + if (id0->index == id1->index + || (id0->index && id1->index && SCF_COLOR_CONFLICT(id0->index->color, id1->index->color))) + return 1; + } + } + return 0; +} + +void scf_rela_free(scf_rela_t* rela); + +void scf_instruction_free (scf_instruction_t* inst); +void scf_instruction_print(scf_instruction_t* inst); + +#endif diff --git a/native/scf_native.c b/native/scf_native.c index 6366eb7..546d461 100644 --- a/native/scf_native.c +++ b/native/scf_native.c @@ -4,52 +4,6 @@ extern scf_native_ops_t native_ops_x64; extern scf_native_ops_t native_ops_risc; extern scf_native_ops_t native_ops_eda; -void scf_instruction_print(scf_instruction_t* inst) -{ - if (inst->OpCode) - printf("%s ", inst->OpCode->name); - - if (1 == inst->src.flag) { - if (inst->src.index) - printf("%d(%s, %s, %d), ", inst->src.disp, inst->src.base->name, - inst->src.index->name, inst->src.scale); - - else if (inst->src.base) { - if (inst->src.disp < 0) - printf("-%#x(%s), ", -inst->src.disp, inst->src.base->name); - else - printf("%#x(%s), ", inst->src.disp, inst->src.base->name); - } else - printf("%d(rip), ", inst->dst.disp); - - } else if (inst->src.base) - printf("%s, ", inst->src.base->name); - - else if (inst->src.imm_size > 0) - printf("%d, ", (int)inst->src.imm); - - if (1 == inst->dst.flag) { - if (inst->dst.index) - printf("%d(%s, %s, %d), ", inst->dst.disp, inst->dst.base->name, - inst->dst.index->name, inst->dst.scale); - - else if (inst->dst.base) { - if (inst->dst.disp < 0) - printf("-%#x(%s), ", -inst->dst.disp, inst->dst.base->name); - else - printf("%#x(%s), ", inst->dst.disp, inst->dst.base->name); - } else - printf("%d(rip), ", inst->dst.disp); - - } else if (inst->dst.base) - printf("%s, ", inst->dst.base->name); - - else if (inst->dst.imm_size > 0) - printf("%d, ", (int)inst->dst.imm); - - printf("\n"); -} - int scf_native_open(scf_native_t** pctx, const char* name) { scf_native_t* ctx = calloc(1, sizeof(scf_native_t)); @@ -98,4 +52,3 @@ int scf_native_select_inst(scf_native_t* ctx, scf_function_t* f) printf("%s(),%d, error: \n", __func__, __LINE__); return -1; } - diff --git a/native/scf_native.h b/native/scf_native.h index f095717..29c1626 100644 --- a/native/scf_native.h +++ b/native/scf_native.h @@ -3,78 +3,9 @@ #include"scf_3ac.h" #include"scf_parse.h" +#include"scf_instruction.h" -typedef struct scf_native_ops_s scf_native_ops_t; - -struct scf_register_s -{ - uint32_t id; - int bytes; - char* name; - - intptr_t color; - - scf_vector_t* dag_nodes; - - uint32_t updated; - uint32_t used; -}; -#define SCF_COLOR_CONFLICT(c0, c1) ((c0) >> 16 == (c1) >> 16 && (c0) & (c1) & 0xffff) - -struct scf_OpCode_s -{ - int type; - char* name; -}; - -typedef struct { - scf_register_t* base; - scf_register_t* index; - - int32_t scale; - int32_t disp; - int32_t size; -} scf_sib_t; - -typedef struct { - scf_register_t* base; - scf_register_t* index; - int scale; - int disp; - - uint64_t imm; - int imm_size; - - uint8_t flag; -} scf_inst_data_t; - -typedef struct { - scf_3ac_code_t* c; - - scf_OpCode_t* OpCode; - - scf_inst_data_t src; - scf_inst_data_t dst; - - uint8_t code[32]; - int len; - - int nb_used; - -} scf_instruction_t; - -typedef struct { - scf_3ac_code_t* code; // related 3ac code - scf_function_t* func; - scf_variable_t* var; - scf_string_t* name; - - scf_instruction_t* inst; - int inst_offset; // byte offset in instruction - int64_t text_offset; // byte offset in .text segment - uint64_t type; - int addend; -} scf_rela_t; +typedef struct scf_native_ops_s scf_native_ops_t; typedef struct { scf_native_ops_t* ops; @@ -239,31 +170,6 @@ struct scf_inst_ops_s void (*set_jmp_offset)(scf_instruction_t* inst, int32_t bytes); }; -static inline int scf_inst_data_same(scf_inst_data_t* id0, scf_inst_data_t* id1) -{ - // global var, are considered as different. - if ((id0->flag && !id0->base) || (id1->flag && !id1->base)) - return 0; - - if (id0->scale == id1->scale - && id0->disp == id1->disp - && id0->flag == id1->flag - && id0->imm == id1->imm - && id0->imm_size == id1->imm_size) { - - if (id0->base == id1->base - || (id0->base && id1->base && SCF_COLOR_CONFLICT(id0->base->color, id1->base->color))) { - - if (id0->index == id1->index - || (id0->index && id1->index && SCF_COLOR_CONFLICT(id0->index->color, id1->index->color))) - return 1; - } - } - return 0; -} - -void scf_instruction_print(scf_instruction_t* inst); - int scf_native_open(scf_native_t** pctx, const char* name); int scf_native_close(scf_native_t* ctx); diff --git a/native/x64/scf_x64.h b/native/x64/scf_x64.h index 6ce1a69..e30405c 100644 --- a/native/x64/scf_x64.h +++ b/native/x64/scf_x64.h @@ -8,38 +8,6 @@ #include"scf_graph.h" #include"scf_elf.h" -#define X64_INST_ADD_CHECK(vec, inst) \ - do { \ - if (!(inst)) { \ - scf_loge("\n"); \ - return -ENOMEM; \ - } \ - int ret = scf_vector_add((vec), (inst)); \ - if (ret < 0) { \ - scf_loge("\n"); \ - free(inst); \ - return ret; \ - } \ - } while (0) - -#define X64_RELA_ADD_CHECK(vec, rela, c, v, f) \ - do { \ - if (rela) { \ - (rela)->code = (c); \ - (rela)->var = (v); \ - (rela)->func = (f); \ - (rela)->inst = (c)->instructions->data[(c)->instructions->size - 1]; \ - (rela)->addend = -4; \ - (rela)->type = R_X86_64_PC32; \ - int ret = scf_vector_add((vec), (rela)); \ - if (ret < 0) { \ - scf_loge("\n"); \ - free(rela); \ - return ret; \ - } \ - } \ - } while (0) - #define X64_PEEPHOLE_DEL 1 #define X64_PEEPHOLE_OK 0 @@ -90,43 +58,15 @@ int x64_load_bb_colors (scf_basic_block_t* bb, scf_bb_group_t* bbg, scf_function int x64_load_bb_colors2(scf_basic_block_t* bb, scf_bb_group_t* bbg, scf_function_t* f); int x64_init_bb_colors (scf_basic_block_t* bb); - -scf_instruction_t* x64_make_inst (scf_x64_OpCode_t* OpCode, int size); -scf_instruction_t* x64_make_inst_G(scf_x64_OpCode_t* OpCode, scf_register_t* r); -scf_instruction_t* x64_make_inst_E(scf_x64_OpCode_t* OpCode, scf_register_t* r); -scf_instruction_t* x64_make_inst_I(scf_x64_OpCode_t* OpCode, uint8_t* imm, int size); -void x64_make_inst_I2(scf_instruction_t* inst, scf_x64_OpCode_t* OpCode, uint8_t* imm, int size); - -scf_instruction_t* x64_make_inst_I2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, uint8_t* imm, int size); -scf_instruction_t* x64_make_inst_I2E(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, uint8_t* imm, int size); - scf_instruction_t* x64_make_inst_M (scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v, scf_register_t* r_base); scf_instruction_t* x64_make_inst_I2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v_dst, scf_register_t* r_base, uint8_t* imm, int32_t size); scf_instruction_t* x64_make_inst_G2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v_dst, scf_register_t* r_base, scf_register_t* r_src); scf_instruction_t* x64_make_inst_M2G(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, scf_variable_t* v_src); -scf_instruction_t* x64_make_inst_G2E(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_src); -scf_instruction_t* x64_make_inst_E2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_src); - -scf_instruction_t* x64_make_inst_P2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, int32_t offset); -scf_instruction_t* x64_make_inst_G2P(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, scf_register_t* r_src); -scf_instruction_t* x64_make_inst_I2P(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, uint8_t* imm, int size); - -scf_instruction_t* x64_make_inst_SIB2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp); -scf_instruction_t* x64_make_inst_G2SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, scf_register_t* r_src); -scf_instruction_t* x64_make_inst_I2SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, uint8_t* imm, int32_t size); - -scf_instruction_t* x64_make_inst_SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, int size); -scf_instruction_t* x64_make_inst_P (scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, int size); - int x64_float_OpCode_type(int OpCode_type, int var_type); - -int x64_shift(scf_native_t* ctx, scf_3ac_code_t* c, int OpCode_type); - -int x64_shift_assign(scf_native_t* ctx, scf_3ac_code_t* c, int OpCode_type); - - +int x64_shift (scf_native_t* ctx, scf_3ac_code_t* c, int OpCode_type); +int x64_shift_assign (scf_native_t* ctx, scf_3ac_code_t* c, int OpCode_type); int x64_binary_assign(scf_native_t* ctx, scf_3ac_code_t* c, int OpCode_type); int x64_assign_dereference(scf_native_t* ctx, scf_3ac_code_t* c); diff --git a/native/x64/scf_x64_inst_util.c b/native/x64/scf_x64_inst_util.c index 6074843..33d8266 100644 --- a/native/x64/scf_x64_inst_util.c +++ b/native/x64/scf_x64_inst_util.c @@ -315,6 +315,46 @@ scf_instruction_t* x64_make_inst_I2E(scf_x64_OpCode_t* OpCode, scf_register_t* r return inst; } +scf_instruction_t* x64_make_inst_L(scf_rela_t** prela, scf_x64_OpCode_t* OpCode) +{ + uint8_t reg = 0; + + if (OpCode->ModRM_OpCode_used) + reg = OpCode->ModRM_OpCode; + + scf_instruction_t* inst = _x64_make_OpCode(OpCode, OpCode->OpBytes, NULL, NULL, NULL); + if (!inst) + return NULL; + + if (_x64_make_disp(prela, inst, reg, -1, 0) < 0) { + free(inst); + return NULL; + } + + if (SCF_X64_INC == OpCode->type || SCF_X64_INC == OpCode->type) { + + inst->src.base = NULL; + inst->src.disp = 0; + inst->src.flag = 1; + + inst->dst.base = NULL; + inst->dst.disp = 0; + inst->dst.flag = 1; + + } else if (SCF_X64_MUL == OpCode->type + || SCF_X64_DIV == OpCode->type + || SCF_X64_IMUL == OpCode->type + || SCF_X64_IDIV == OpCode->type + || SCF_X64_CALL == OpCode->type) { + + inst->src.base = NULL; + inst->src.disp = 0; + inst->src.flag = 1; + } + + return inst; +} + scf_instruction_t* x64_make_inst_M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v, scf_register_t* r_base) { scf_register_t* rbp = x64_find_register("rbp"); @@ -382,6 +422,42 @@ scf_instruction_t* x64_make_inst_M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, return inst; } +scf_instruction_t* x64_make_inst_I2L(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, uint8_t* imm, int32_t size) +{ + uint8_t reg = 0; + + if (OpCode->ModRM_OpCode_used) + reg = OpCode->ModRM_OpCode; + + scf_instruction_t* inst = _x64_make_OpCode(OpCode, OpCode->OpBytes, NULL, NULL, NULL); + if (!inst) + return NULL; + + if (_x64_make_disp(prela, inst, reg, -1, 0) < 0) { + free(inst); + return NULL; + } + + size = size > OpCode->OpBytes ? OpCode->OpBytes : size; + + uint8_t* p = (uint8_t*)&inst->src.imm; + int i; + + inst->src.imm = 0; + + for (i = 0; i < size; i++) { + inst->code[inst->len++] = imm[i]; + p[i] = imm[i]; + } + + inst->dst.base = NULL; + inst->dst.disp = 0; + inst->dst.flag = 1; + + inst->src.imm_size = size; + return inst; +} + scf_instruction_t* x64_make_inst_I2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v_dst, scf_register_t* r_base, uint8_t* imm, int32_t size) { scf_register_t* rbp = x64_find_register("rbp"); @@ -445,6 +521,30 @@ scf_instruction_t* x64_make_inst_I2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCod return inst; } +scf_instruction_t* x64_make_inst_G2L(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_src) +{ + if (OpCode->ModRM_OpCode_used) { + scf_loge("ModRM opcode invalid\n"); + return NULL; + } + + scf_instruction_t* inst = _x64_make_OpCode(OpCode, OpCode->OpBytes, r_src, NULL, NULL); + if (!inst) + return NULL; + + if (_x64_make_disp(prela, inst, r_src->id, -1, 0) < 0) { + free(inst); + return NULL; + } + + inst->src.base = r_src; + inst->dst.base = NULL; + inst->dst.disp = 0; + inst->dst.flag = 1; + + return inst; +} + scf_instruction_t* x64_make_inst_G2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_variable_t* v_dst, scf_register_t* r_base, scf_register_t* r_src) { if (OpCode->ModRM_OpCode_used) { @@ -499,6 +599,30 @@ scf_instruction_t* x64_make_inst_G2M(scf_rela_t** prela, scf_x64_OpCode_t* OpCod return inst; } +scf_instruction_t* x64_make_inst_L2G(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_dst) +{ + if (OpCode->ModRM_OpCode_used) { + scf_loge("ModRM opcode invalid\n"); + return NULL; + } + + scf_instruction_t* inst = _x64_make_OpCode(OpCode, r_dst->bytes, r_dst, NULL, NULL); + if (!inst) + return NULL; + + if (_x64_make_disp(prela, inst, r_dst->id, -1, 0) < 0) { + free(inst); + return NULL; + } + + inst->dst.base = r_dst; + inst->src.base = NULL; + inst->src.disp = 0; + inst->src.flag = 1; + + return inst; +} + scf_instruction_t* x64_make_inst_M2G(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, scf_variable_t* v_src) { if (OpCode->ModRM_OpCode_used) { diff --git a/native/x64/scf_x64_opcode.c b/native/x64/scf_x64_opcode.c index 1b51082..d1aa505 100644 --- a/native/x64/scf_x64_opcode.c +++ b/native/x64/scf_x64_opcode.c @@ -22,6 +22,16 @@ scf_x64_OpCode_t x64_OpCodes[] = { {SCF_X64_XOR, "xor", 2, {0x33, 0x0, 0x0},1, 2,2, SCF_X64_E2G, 0,0, 0,{0,0}}, {SCF_X64_XOR, "xor", 2, {0x33, 0x0, 0x0},1, 4,4, SCF_X64_E2G, 0,0, 0,{0,0}}, {SCF_X64_XOR, "xor", 2, {0x33, 0x0, 0x0},1, 8,8, SCF_X64_E2G, 0,0, 0,{0,0}}, + + {SCF_X64_XOR, "xorb", 2, {0x30, 0x0, 0x0},1, 1,1, SCF_X64_G2E, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorw", 2, {0x31, 0x0, 0x0},1, 2,2, SCF_X64_G2E, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorl", 2, {0x31, 0x0, 0x0},1, 4,4, SCF_X64_G2E, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorq", 2, {0x31, 0x0, 0x0},1, 8,8, SCF_X64_G2E, 0,0, 0,{0,0}}, + + {SCF_X64_XOR, "xorb", 2, {0x32, 0x0, 0x0},1, 1,1, SCF_X64_E2G, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorw", 2, {0x33, 0x0, 0x0},1, 2,2, SCF_X64_E2G, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorl", 2, {0x33, 0x0, 0x0},1, 4,4, SCF_X64_E2G, 0,0, 0,{0,0}}, + {SCF_X64_XOR, "xorq", 2, {0x33, 0x0, 0x0},1, 8,8, SCF_X64_E2G, 0,0, 0,{0,0}}, #if 0 {SCF_X64_XOR, "xor", 2, {0x34, 0x0, 0x0},1, 1,1, SCF_X64_I2G, 0,0, 1,{0,0}}, {SCF_X64_XOR, "xor", 2, {0x35, 0x0, 0x0},1, 2,2, SCF_X64_I2G, 0,0, 1,{0,0}}, @@ -161,6 +171,7 @@ scf_x64_OpCode_t x64_OpCodes[] = { {SCF_X64_NOT, "not", 2, {0xf7, 0x0, 0x0},1, 8,8, SCF_X64_E, 2,1, 0,{0,0}}, {SCF_X64_LEA, "lea", 1, {0x8d, 0x0, 0x0},1, 8,8, SCF_X64_E2G, 0,0, 0,{0,0}}, + {SCF_X64_LEA, "leaq", 1, {0x8d, 0x0, 0x0},1, 8,8, SCF_X64_E2G, 0,0, 0,{0,0}}, {SCF_X64_MOV, "mov", 2, {0x88, 0x0, 0x0},1, 1,1, SCF_X64_G2E, 0,0, 0,{0,0}}, {SCF_X64_MOV, "mov", 2, {0x89, 0x0, 0x0},1, 2,2, SCF_X64_G2E, 0,0, 0,{0,0}}, @@ -318,7 +329,7 @@ scf_x64_OpCode_t x64_OpCodes[] = { {SCF_X64_JMP, "jmp", 2, {0xff, 0x0, 0x0},1, 8,8, SCF_X64_E, 4,1, 0,{0,0}}, }; -scf_x64_OpCode_t* x64_find_OpCode_by_type(const int type) +scf_x64_OpCode_t* x64_find_OpCode_by_type(const int type) { int i; for (i = 0; i < sizeof(x64_OpCodes) / sizeof(x64_OpCodes[0]); i++) { @@ -330,23 +341,19 @@ scf_x64_OpCode_t* x64_find_OpCode_by_type(const int type) return NULL; } -scf_x64_OpCode_t* x64_find_OpCode(const int type, const int OpBytes, const int RegBytes, const int EG) +scf_x64_OpCode_t* x64_find_OpCode_by_name(const char* name) { int i; for (i = 0; i < sizeof(x64_OpCodes) / sizeof(x64_OpCodes[0]); i++) { scf_x64_OpCode_t* OpCode = &(x64_OpCodes[i]); - - if (type == OpCode->type - && OpBytes == OpCode->OpBytes - && RegBytes == OpCode->RegBytes - && EG == OpCode->EG) + if (!strcmp(OpCode->name, name)) return OpCode; } return NULL; } -int x64_find_OpCodes(scf_vector_t* results, const int type, const int OpBytes, const int RegBytes, const int EG) +scf_x64_OpCode_t* x64_find_OpCode(const int type, const int OpBytes, const int RegBytes, const int EG) { int i; for (i = 0; i < sizeof(x64_OpCodes) / sizeof(x64_OpCodes[0]); i++) { @@ -356,13 +363,8 @@ int x64_find_OpCodes(scf_vector_t* results, const int type, const int OpBytes, c if (type == OpCode->type && OpBytes == OpCode->OpBytes && RegBytes == OpCode->RegBytes - && EG == OpCode->EG) { - - int ret = scf_vector_add(results, OpCode); - if (ret < 0) - return ret; - } + && EG == OpCode->EG) + return OpCode; } - return 0; + return NULL; } - diff --git a/native/x64/scf_x64_opcode.h b/native/x64/scf_x64_opcode.h index c471e2d..cef6d64 100644 --- a/native/x64/scf_x64_opcode.h +++ b/native/x64/scf_x64_opcode.h @@ -3,6 +3,7 @@ #include"scf_native.h" #include"scf_x64_util.h" +#include"scf_elf.h" typedef struct { int type; @@ -27,10 +28,90 @@ typedef struct { uint32_t regs[2]; } scf_x64_OpCode_t; -scf_x64_OpCode_t* x64_find_OpCode_by_type(const int type); -scf_x64_OpCode_t* x64_find_OpCode(const int type, const int OpBytes, const int RegBytes, const int EG); +scf_x64_OpCode_t* x64_find_OpCode (const int type, const int OpBytes, const int RegBytes, const int EG); +scf_x64_OpCode_t* x64_find_OpCode_by_type(const int type); +scf_x64_OpCode_t* x64_find_OpCode_by_name(const char* name); -int x64_find_OpCodes(scf_vector_t* results, const int type, const int OpBytes, const int RegBytes, const int EG); +scf_instruction_t* x64_make_inst (scf_x64_OpCode_t* OpCode, int size); +scf_instruction_t* x64_make_inst_G(scf_x64_OpCode_t* OpCode, scf_register_t* r); +scf_instruction_t* x64_make_inst_E(scf_x64_OpCode_t* OpCode, scf_register_t* r); +scf_instruction_t* x64_make_inst_I(scf_x64_OpCode_t* OpCode, uint8_t* imm, int size); +void x64_make_inst_I2(scf_instruction_t* inst, scf_x64_OpCode_t* OpCode, uint8_t* imm, int size); + +scf_instruction_t* x64_make_inst_I2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, uint8_t* imm, int size); +scf_instruction_t* x64_make_inst_I2E(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, uint8_t* imm, int size); + +scf_instruction_t* x64_make_inst_G2E(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_src); +scf_instruction_t* x64_make_inst_E2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_src); + +scf_instruction_t* x64_make_inst_P2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, int32_t offset); +scf_instruction_t* x64_make_inst_G2P(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, scf_register_t* r_src); +scf_instruction_t* x64_make_inst_I2P(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, uint8_t* imm, int size); + +scf_instruction_t* x64_make_inst_SIB2G(scf_x64_OpCode_t* OpCode, scf_register_t* r_dst, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp); +scf_instruction_t* x64_make_inst_G2SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, scf_register_t* r_src); +scf_instruction_t* x64_make_inst_I2SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, uint8_t* imm, int32_t size); + +scf_instruction_t* x64_make_inst_SIB(scf_x64_OpCode_t* OpCode, scf_register_t* r_base, scf_register_t* r_index, int32_t scale, int32_t disp, int size); +scf_instruction_t* x64_make_inst_P (scf_x64_OpCode_t* OpCode, scf_register_t* r_base, int32_t offset, int size); + +scf_instruction_t* x64_make_inst_L (scf_rela_t** prela, scf_x64_OpCode_t* OpCode); +scf_instruction_t* x64_make_inst_I2L(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, uint8_t* imm, int32_t size); +scf_instruction_t* x64_make_inst_G2L(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_src); +scf_instruction_t* x64_make_inst_L2G(scf_rela_t** prela, scf_x64_OpCode_t* OpCode, scf_register_t* r_dst); + +#define X64_INST_ADD_CHECK(vec, inst) \ + do { \ + if (!(inst)) { \ + scf_loge("\n"); \ + return -ENOMEM; \ + } \ + int ret = scf_vector_add((vec), (inst)); \ + if (ret < 0) { \ + scf_loge("\n"); \ + free(inst); \ + return ret; \ + } \ + } while (0) + +#define X64_RELA_ADD_CHECK(vec, rela, c, v, f) \ + do { \ + if (rela) { \ + (rela)->code = (c); \ + (rela)->var = (v); \ + (rela)->func = (f); \ + (rela)->inst = (c)->instructions->data[(c)->instructions->size - 1]; \ + (rela)->addend = -4; \ + (rela)->type = R_X86_64_PC32; \ + int ret = scf_vector_add((vec), (rela)); \ + if (ret < 0) { \ + scf_loge("\n"); \ + free(rela); \ + return ret; \ + } \ + } \ + } while (0) + +#define X64_RELA_ADD_LABEL(vec, rela, _inst, _label) \ + do { \ + if (rela) { \ + (rela)->inst = (_inst); \ + (rela)->addend = -4; \ + (rela)->type = R_X86_64_PC32; \ + (rela)->name = scf_string_clone(_label); \ + if (!(rela)->name) { \ + scf_loge("\n"); \ + scf_rela_free(rela); \ + return -ENOMEM; \ + } \ + int ret = scf_vector_add((vec), (rela)); \ + if (ret < 0) { \ + scf_loge("\n"); \ + scf_rela_free(rela); \ + return ret; \ + } \ + } \ + } while (0) #endif diff --git a/native/x64/scf_x64_reg.c b/native/x64/scf_x64_reg.c index 36b1bd7..a83fb6a 100644 --- a/native/x64/scf_x64_reg.c +++ b/native/x64/scf_x64_reg.c @@ -1,117 +1,5 @@ #include"scf_x64.h" - -scf_register_t x64_registers[] = { - - {0, 1, "al", X64_COLOR(0, 0, 0x1), NULL, 0}, - {0, 2, "ax", X64_COLOR(0, 0, 0x3), NULL, 0}, - {0, 4, "eax", X64_COLOR(0, 0, 0xf), NULL, 0}, - {0, 8, "rax", X64_COLOR(0, 0, 0xff), NULL, 0}, - - {1, 1, "cl", X64_COLOR(0, 1, 0x1), NULL, 0}, - {1, 2, "cx", X64_COLOR(0, 1, 0x3), NULL, 0}, - {1, 4, "ecx", X64_COLOR(0, 1, 0xf), NULL, 0}, - {1, 8, "rcx", X64_COLOR(0, 1, 0xff), NULL, 0}, - - {2, 1, "dl", X64_COLOR(0, 2, 0x1), NULL, 0}, - {2, 2, "dx", X64_COLOR(0, 2, 0x3), NULL, 0}, - {2, 4, "edx", X64_COLOR(0, 2, 0xf), NULL, 0}, - {2, 8, "rdx", X64_COLOR(0, 2, 0xff), NULL, 0}, - - {3, 1, "bl", X64_COLOR(0, 3, 0x1), NULL, 0}, - {3, 2, "bx", X64_COLOR(0, 3, 0x3), NULL, 0}, - {3, 4, "ebx", X64_COLOR(0, 3, 0xf), NULL, 0}, - {3, 8, "rbx", X64_COLOR(0, 3, 0xff), NULL, 0}, - - {4, 2, "sp", X64_COLOR(0, 4, 0x3), NULL, 0}, - {4, 4, "esp", X64_COLOR(0, 4, 0xf), NULL, 0}, - {4, 8, "rsp", X64_COLOR(0, 4, 0xff), NULL, 0}, - - {5, 2, "bp", X64_COLOR(0, 5, 0x3), NULL, 0}, - {5, 4, "ebp", X64_COLOR(0, 5, 0xf), NULL, 0}, - {5, 8, "rbp", X64_COLOR(0, 5, 0xff), NULL, 0}, - - {6, 1, "sil", X64_COLOR(0, 6, 0x1), NULL, 0}, - {6, 2, "si", X64_COLOR(0, 6, 0x3), NULL, 0}, - {6, 4, "esi", X64_COLOR(0, 6, 0xf), NULL, 0}, - {6, 8, "rsi", X64_COLOR(0, 6, 0xff), NULL, 0}, - - {7, 1, "dil", X64_COLOR(0, 7, 0x1), NULL, 0}, - {7, 2, "di", X64_COLOR(0, 7, 0x3), NULL, 0}, - {7, 4, "edi", X64_COLOR(0, 7, 0xf), NULL, 0}, - {7, 8, "rdi", X64_COLOR(0, 7, 0xff), NULL, 0}, - - {8, 1, "r8b", X64_COLOR(0, 8, 0x1), NULL, 0}, - {8, 2, "r8w", X64_COLOR(0, 8, 0x3), NULL, 0}, - {8, 4, "r8d", X64_COLOR(0, 8, 0xf), NULL, 0}, - {8, 8, "r8", X64_COLOR(0, 8, 0xff), NULL, 0}, - - {9, 1, "r9b", X64_COLOR(0, 9, 0x1), NULL, 0}, - {9, 2, "r9w", X64_COLOR(0, 9, 0x3), NULL, 0}, - {9, 4, "r9d", X64_COLOR(0, 9, 0xf), NULL, 0}, - {9, 8, "r9", X64_COLOR(0, 9, 0xff), NULL, 0}, - - {10, 1, "r10b", X64_COLOR(0, 10, 0x1), NULL, 0}, - {10, 2, "r10w", X64_COLOR(0, 10, 0x3), NULL, 0}, - {10, 4, "r10d", X64_COLOR(0, 10, 0xf), NULL, 0}, - {10, 8, "r10", X64_COLOR(0, 10, 0xff), NULL, 0}, - - {11, 1, "r11b", X64_COLOR(0, 11, 0x1), NULL, 0}, - {11, 2, "r11w", X64_COLOR(0, 11, 0x3), NULL, 0}, - {11, 4, "r11d", X64_COLOR(0, 11, 0xf), NULL, 0}, - {11, 8, "r11", X64_COLOR(0, 11, 0xff), NULL, 0}, - - {12, 1, "r12b", X64_COLOR(0, 12, 0x1), NULL, 0}, - {12, 2, "r12w", X64_COLOR(0, 12, 0x3), NULL, 0}, - {12, 4, "r12d", X64_COLOR(0, 12, 0xf), NULL, 0}, - {12, 8, "r12", X64_COLOR(0, 12, 0xff), NULL, 0}, - - {13, 1, "r13b", X64_COLOR(0, 13, 0x1), NULL, 0}, - {13, 2, "r13w", X64_COLOR(0, 13, 0x3), NULL, 0}, - {13, 4, "r13d", X64_COLOR(0, 13, 0xf), NULL, 0}, - {13, 8, "r13", X64_COLOR(0, 13, 0xff), NULL, 0}, - - {14, 1, "r14b", X64_COLOR(0, 14, 0x1), NULL, 0}, - {14, 2, "r14w", X64_COLOR(0, 14, 0x3), NULL, 0}, - {14, 4, "r14d", X64_COLOR(0, 14, 0xf), NULL, 0}, - {14, 8, "r14", X64_COLOR(0, 14, 0xff), NULL, 0}, - - {15, 1, "r15b", X64_COLOR(0, 15, 0x1), NULL, 0}, - {15, 2, "r15w", X64_COLOR(0, 15, 0x3), NULL, 0}, - {15, 4, "r15d", X64_COLOR(0, 15, 0xf), NULL, 0}, - {15, 8, "r15", X64_COLOR(0, 15, 0xff), NULL, 0}, - - {4, 1, "ah", X64_COLOR(0, 0, 0x2), NULL, 0}, - {5, 1, "ch", X64_COLOR(0, 1, 0x2), NULL, 0}, - {6, 1, "dh", X64_COLOR(0, 2, 0x2), NULL, 0}, - {7, 1, "bh", X64_COLOR(0, 3, 0x2), NULL, 0}, - - {0, 4, "mm0", X64_COLOR(1, 0, 0xf), NULL, 0}, - {0, 8, "xmm0", X64_COLOR(1, 0, 0xff), NULL, 0}, - - {1, 4, "mm1", X64_COLOR(1, 1, 0xf), NULL, 0}, - {1, 8, "xmm1", X64_COLOR(1, 1, 0xff), NULL, 0}, - - {2, 4, "mm2", X64_COLOR(1, 2, 0xf), NULL, 0}, - {2, 8, "xmm2", X64_COLOR(1, 2, 0xff), NULL, 0}, - - {3, 4, "mm3", X64_COLOR(1, 3, 0xf), NULL, 0}, - {3, 8, "xmm3", X64_COLOR(1, 3, 0xff), NULL, 0}, - - {4, 4, "mm4", X64_COLOR(1, 4, 0xf), NULL, 0}, - {4, 8, "xmm4", X64_COLOR(1, 4, 0xff), NULL, 0}, - - {5, 4, "mm5", X64_COLOR(1, 5, 0xf), NULL, 0}, - {5, 8, "xmm5", X64_COLOR(1, 5, 0xff), NULL, 0}, - - {6, 4, "mm6", X64_COLOR(1, 6, 0xf), NULL, 0}, - {6, 8, "xmm6", X64_COLOR(1, 6, 0xff), NULL, 0}, - - {7, 4, "mm7", X64_COLOR(1, 7, 0xf), NULL, 0}, - {7, 8, "xmm7", X64_COLOR(1, 7, 0xff), NULL, 0}, - - - {0xf, 8, "rip", X64_COLOR(0, 7, 0xff), NULL, 0}, -}; +#include"scf_x64_reg_util.c" int x64_reg_cached_vars(scf_register_t* r) { @@ -416,58 +304,6 @@ int x64_registers_reset() return 0; } -scf_register_t* x64_find_register(const char* name) -{ - int i; - for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { - - scf_register_t* r = &(x64_registers[i]); - - if (!strcmp(r->name, name)) - return r; - } - return NULL; -} - -scf_register_t* x64_find_register_type_id_bytes(uint32_t type, uint32_t id, int bytes) -{ - int i; - for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { - - scf_register_t* r = &(x64_registers[i]); - - if (X64_COLOR_TYPE(r->color) == type && r->id == id && r->bytes == bytes) - return r; - } - return NULL; -} - -scf_register_t* x64_find_register_color(intptr_t color) -{ - int i; - for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { - - scf_register_t* r = &(x64_registers[i]); - - if (r->color == color) - return r; - } - return NULL; -} - -scf_register_t* x64_find_register_color_bytes(intptr_t color, int bytes) -{ - int i; - for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { - - scf_register_t* r = &(x64_registers[i]); - - if (X64_COLOR_CONFLICT(r->color, color) && r->bytes == bytes) - return r; - } - return NULL; -} - scf_vector_t* x64_register_colors() { scf_vector_t* colors = scf_vector_alloc(); diff --git a/native/x64/scf_x64_reg.h b/native/x64/scf_x64_reg.h index 40c5967..0e59579 100644 --- a/native/x64/scf_x64_reg.h +++ b/native/x64/scf_x64_reg.h @@ -1,25 +1,7 @@ #ifndef SCF_X64_REG_H #define SCF_X64_REG_H -#include"scf_native.h" -#include"scf_x64_util.h" - -#define X64_COLOR(type, id, mask) ((type) << 24 | (id) << 16 | (mask)) -#define X64_COLOR_TYPE(c) ((c) >> 24) -#define X64_COLOR_ID(c) (((c) >> 16) & 0xff) -#define X64_COLOR_MASK(c) ((c) & 0xffff) -#define X64_COLOR_CONFLICT(c0, c1) ( (c0) >> 16 == (c1) >> 16 && (c0) & (c1) & 0xffff ) - -#define X64_COLOR_BYTES(c) \ - ({ \ - int n = 0;\ - intptr_t minor = (c) & 0xffff; \ - while (minor) { \ - minor &= minor - 1; \ - n++;\ - } \ - n;\ - }) +#include"scf_x64_reg_util.h" #define X64_SELECT_REG_CHECK(pr, dn, c, f, load_flag) \ do {\ @@ -98,15 +80,6 @@ static const char* x64_abi_callee_saves[] = }; #define X64_ABI_CALLEE_SAVES_NB (sizeof(x64_abi_callee_saves) / sizeof(x64_abi_callee_saves[0])) -typedef struct { - scf_register_t* base; - scf_register_t* index; - - int32_t scale; - int32_t disp; - int32_t size; -} x64_sib_t; - static inline int x64_variable_size(scf_variable_t* v) { if (v->nb_dimentions > 0) @@ -126,14 +99,6 @@ void x64_registers_clear(); void x64_registers_print(); scf_vector_t* x64_register_colors(); -scf_register_t* x64_find_register(const char* name); - -scf_register_t* x64_find_register_type_id_bytes(uint32_t type, uint32_t id, int bytes); - -scf_register_t* x64_find_register_color(intptr_t color); - -scf_register_t* x64_find_register_color_bytes(intptr_t color, int bytes); - scf_register_t* x64_find_abi_register(int index, int bytes); scf_register_t* x64_select_overflowed_reg(scf_dag_node_t* dn, scf_3ac_code_t* c); @@ -186,48 +151,4 @@ static inline int x64_reg_is_retval(scf_register_t* r) return 0; } -static inline int x64_inst_data_is_reg(scf_inst_data_t* id) -{ - scf_register_t* rsp = x64_find_register("rsp"); - scf_register_t* rbp = x64_find_register("rbp"); - - if (!id->flag && id->base && id->base != rsp && id->base != rbp && 0 == id->imm_size) - return 1; - return 0; -} - -static inline int x64_inst_data_is_local(scf_inst_data_t* id) -{ - scf_register_t* rbp = x64_find_register("rbp"); - scf_register_t* rsp = x64_find_register("rsp"); - - if (id->flag && (id->base == rbp || id->base == rsp)) - return 1; - return 0; -} - -static inline int x64_inst_data_is_global(scf_inst_data_t* id) -{ - if (id->flag && !id->base) - return 1; - return 0; -} - -static inline int x64_inst_data_is_const(scf_inst_data_t* id) -{ - if (!id->flag && id->imm_size > 0) - return 1; - return 0; -} - -static inline int x64_inst_data_is_pointer(scf_inst_data_t* id) -{ - scf_register_t* rbp = x64_find_register("rbp"); - scf_register_t* rsp = x64_find_register("rsp"); - - if (id->flag && id->base && id->base != rbp && id->base != rsp) - return 1; - return 0; -} - #endif diff --git a/native/x64/scf_x64_reg_util.c b/native/x64/scf_x64_reg_util.c new file mode 100644 index 0000000..36556bd --- /dev/null +++ b/native/x64/scf_x64_reg_util.c @@ -0,0 +1,166 @@ +#include"scf_x64.h" + +scf_register_t x64_registers[] = { + + {0, 1, "al", X64_COLOR(0, 0, 0x1), NULL, 0}, + {0, 2, "ax", X64_COLOR(0, 0, 0x3), NULL, 0}, + {0, 4, "eax", X64_COLOR(0, 0, 0xf), NULL, 0}, + {0, 8, "rax", X64_COLOR(0, 0, 0xff), NULL, 0}, + + {1, 1, "cl", X64_COLOR(0, 1, 0x1), NULL, 0}, + {1, 2, "cx", X64_COLOR(0, 1, 0x3), NULL, 0}, + {1, 4, "ecx", X64_COLOR(0, 1, 0xf), NULL, 0}, + {1, 8, "rcx", X64_COLOR(0, 1, 0xff), NULL, 0}, + + {2, 1, "dl", X64_COLOR(0, 2, 0x1), NULL, 0}, + {2, 2, "dx", X64_COLOR(0, 2, 0x3), NULL, 0}, + {2, 4, "edx", X64_COLOR(0, 2, 0xf), NULL, 0}, + {2, 8, "rdx", X64_COLOR(0, 2, 0xff), NULL, 0}, + + {3, 1, "bl", X64_COLOR(0, 3, 0x1), NULL, 0}, + {3, 2, "bx", X64_COLOR(0, 3, 0x3), NULL, 0}, + {3, 4, "ebx", X64_COLOR(0, 3, 0xf), NULL, 0}, + {3, 8, "rbx", X64_COLOR(0, 3, 0xff), NULL, 0}, + + {4, 2, "sp", X64_COLOR(0, 4, 0x3), NULL, 0}, + {4, 4, "esp", X64_COLOR(0, 4, 0xf), NULL, 0}, + {4, 8, "rsp", X64_COLOR(0, 4, 0xff), NULL, 0}, + + {5, 2, "bp", X64_COLOR(0, 5, 0x3), NULL, 0}, + {5, 4, "ebp", X64_COLOR(0, 5, 0xf), NULL, 0}, + {5, 8, "rbp", X64_COLOR(0, 5, 0xff), NULL, 0}, + + {6, 1, "sil", X64_COLOR(0, 6, 0x1), NULL, 0}, + {6, 2, "si", X64_COLOR(0, 6, 0x3), NULL, 0}, + {6, 4, "esi", X64_COLOR(0, 6, 0xf), NULL, 0}, + {6, 8, "rsi", X64_COLOR(0, 6, 0xff), NULL, 0}, + + {7, 1, "dil", X64_COLOR(0, 7, 0x1), NULL, 0}, + {7, 2, "di", X64_COLOR(0, 7, 0x3), NULL, 0}, + {7, 4, "edi", X64_COLOR(0, 7, 0xf), NULL, 0}, + {7, 8, "rdi", X64_COLOR(0, 7, 0xff), NULL, 0}, + + {8, 1, "r8b", X64_COLOR(0, 8, 0x1), NULL, 0}, + {8, 2, "r8w", X64_COLOR(0, 8, 0x3), NULL, 0}, + {8, 4, "r8d", X64_COLOR(0, 8, 0xf), NULL, 0}, + {8, 8, "r8", X64_COLOR(0, 8, 0xff), NULL, 0}, + + {9, 1, "r9b", X64_COLOR(0, 9, 0x1), NULL, 0}, + {9, 2, "r9w", X64_COLOR(0, 9, 0x3), NULL, 0}, + {9, 4, "r9d", X64_COLOR(0, 9, 0xf), NULL, 0}, + {9, 8, "r9", X64_COLOR(0, 9, 0xff), NULL, 0}, + + {10, 1, "r10b", X64_COLOR(0, 10, 0x1), NULL, 0}, + {10, 2, "r10w", X64_COLOR(0, 10, 0x3), NULL, 0}, + {10, 4, "r10d", X64_COLOR(0, 10, 0xf), NULL, 0}, + {10, 8, "r10", X64_COLOR(0, 10, 0xff), NULL, 0}, + + {11, 1, "r11b", X64_COLOR(0, 11, 0x1), NULL, 0}, + {11, 2, "r11w", X64_COLOR(0, 11, 0x3), NULL, 0}, + {11, 4, "r11d", X64_COLOR(0, 11, 0xf), NULL, 0}, + {11, 8, "r11", X64_COLOR(0, 11, 0xff), NULL, 0}, + + {12, 1, "r12b", X64_COLOR(0, 12, 0x1), NULL, 0}, + {12, 2, "r12w", X64_COLOR(0, 12, 0x3), NULL, 0}, + {12, 4, "r12d", X64_COLOR(0, 12, 0xf), NULL, 0}, + {12, 8, "r12", X64_COLOR(0, 12, 0xff), NULL, 0}, + + {13, 1, "r13b", X64_COLOR(0, 13, 0x1), NULL, 0}, + {13, 2, "r13w", X64_COLOR(0, 13, 0x3), NULL, 0}, + {13, 4, "r13d", X64_COLOR(0, 13, 0xf), NULL, 0}, + {13, 8, "r13", X64_COLOR(0, 13, 0xff), NULL, 0}, + + {14, 1, "r14b", X64_COLOR(0, 14, 0x1), NULL, 0}, + {14, 2, "r14w", X64_COLOR(0, 14, 0x3), NULL, 0}, + {14, 4, "r14d", X64_COLOR(0, 14, 0xf), NULL, 0}, + {14, 8, "r14", X64_COLOR(0, 14, 0xff), NULL, 0}, + + {15, 1, "r15b", X64_COLOR(0, 15, 0x1), NULL, 0}, + {15, 2, "r15w", X64_COLOR(0, 15, 0x3), NULL, 0}, + {15, 4, "r15d", X64_COLOR(0, 15, 0xf), NULL, 0}, + {15, 8, "r15", X64_COLOR(0, 15, 0xff), NULL, 0}, + + {4, 1, "ah", X64_COLOR(0, 0, 0x2), NULL, 0}, + {5, 1, "ch", X64_COLOR(0, 1, 0x2), NULL, 0}, + {6, 1, "dh", X64_COLOR(0, 2, 0x2), NULL, 0}, + {7, 1, "bh", X64_COLOR(0, 3, 0x2), NULL, 0}, + + {0, 4, "mm0", X64_COLOR(1, 0, 0xf), NULL, 0}, + {0, 8, "xmm0", X64_COLOR(1, 0, 0xff), NULL, 0}, + + {1, 4, "mm1", X64_COLOR(1, 1, 0xf), NULL, 0}, + {1, 8, "xmm1", X64_COLOR(1, 1, 0xff), NULL, 0}, + + {2, 4, "mm2", X64_COLOR(1, 2, 0xf), NULL, 0}, + {2, 8, "xmm2", X64_COLOR(1, 2, 0xff), NULL, 0}, + + {3, 4, "mm3", X64_COLOR(1, 3, 0xf), NULL, 0}, + {3, 8, "xmm3", X64_COLOR(1, 3, 0xff), NULL, 0}, + + {4, 4, "mm4", X64_COLOR(1, 4, 0xf), NULL, 0}, + {4, 8, "xmm4", X64_COLOR(1, 4, 0xff), NULL, 0}, + + {5, 4, "mm5", X64_COLOR(1, 5, 0xf), NULL, 0}, + {5, 8, "xmm5", X64_COLOR(1, 5, 0xff), NULL, 0}, + + {6, 4, "mm6", X64_COLOR(1, 6, 0xf), NULL, 0}, + {6, 8, "xmm6", X64_COLOR(1, 6, 0xff), NULL, 0}, + + {7, 4, "mm7", X64_COLOR(1, 7, 0xf), NULL, 0}, + {7, 8, "xmm7", X64_COLOR(1, 7, 0xff), NULL, 0}, + + + {0xf, 8, "rip", X64_COLOR(0, 7, 0xff), NULL, 0}, +}; + +scf_register_t* x64_find_register(const char* name) +{ + int i; + for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { + + scf_register_t* r = &(x64_registers[i]); + + if (!strcmp(r->name, name)) + return r; + } + return NULL; +} + +scf_register_t* x64_find_register_type_id_bytes(uint32_t type, uint32_t id, int bytes) +{ + int i; + for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { + + scf_register_t* r = &(x64_registers[i]); + + if (X64_COLOR_TYPE(r->color) == type && r->id == id && r->bytes == bytes) + return r; + } + return NULL; +} + +scf_register_t* x64_find_register_color(intptr_t color) +{ + int i; + for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { + + scf_register_t* r = &(x64_registers[i]); + + if (r->color == color) + return r; + } + return NULL; +} + +scf_register_t* x64_find_register_color_bytes(intptr_t color, int bytes) +{ + int i; + for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { + + scf_register_t* r = &(x64_registers[i]); + + if (X64_COLOR_CONFLICT(r->color, color) && r->bytes == bytes) + return r; + } + return NULL; +} diff --git a/native/x64/scf_x64_reg_util.h b/native/x64/scf_x64_reg_util.h new file mode 100644 index 0000000..1f23aaf --- /dev/null +++ b/native/x64/scf_x64_reg_util.h @@ -0,0 +1,85 @@ +#ifndef SCF_X64_REG_UTIL_H +#define SCF_X64_REG_UTIL_H + +#include"scf_native.h" +#include"scf_x64_util.h" + +#define X64_COLOR(type, id, mask) ((type) << 24 | (id) << 16 | (mask)) +#define X64_COLOR_TYPE(c) ((c) >> 24) +#define X64_COLOR_ID(c) (((c) >> 16) & 0xff) +#define X64_COLOR_MASK(c) ((c) & 0xffff) +#define X64_COLOR_CONFLICT(c0, c1) ( (c0) >> 16 == (c1) >> 16 && (c0) & (c1) & 0xffff ) + +#define X64_COLOR_BYTES(c) \ + ({ \ + int n = 0;\ + intptr_t minor = (c) & 0xffff; \ + while (minor) { \ + minor &= minor - 1; \ + n++;\ + } \ + n;\ + }) + +typedef struct { + scf_register_t* base; + scf_register_t* index; + + int32_t scale; + int32_t disp; + int32_t size; +} x64_sib_t; + +scf_register_t* x64_find_register(const char* name); + +scf_register_t* x64_find_register_type_id_bytes(uint32_t type, uint32_t id, int bytes); + +scf_register_t* x64_find_register_color(intptr_t color); + +scf_register_t* x64_find_register_color_bytes(intptr_t color, int bytes); + +static inline int x64_inst_data_is_reg(scf_inst_data_t* id) +{ + scf_register_t* rsp = x64_find_register("rsp"); + scf_register_t* rbp = x64_find_register("rbp"); + + if (!id->flag && id->base && id->base != rsp && id->base != rbp && 0 == id->imm_size) + return 1; + return 0; +} + +static inline int x64_inst_data_is_local(scf_inst_data_t* id) +{ + scf_register_t* rbp = x64_find_register("rbp"); + scf_register_t* rsp = x64_find_register("rsp"); + + if (id->flag && (id->base == rbp || id->base == rsp)) + return 1; + return 0; +} + +static inline int x64_inst_data_is_global(scf_inst_data_t* id) +{ + if (id->flag && !id->base) + return 1; + return 0; +} + +static inline int x64_inst_data_is_const(scf_inst_data_t* id) +{ + if (!id->flag && id->imm_size > 0) + return 1; + return 0; +} + +static inline int x64_inst_data_is_pointer(scf_inst_data_t* id) +{ + scf_register_t* rbp = x64_find_register("rbp"); + scf_register_t* rsp = x64_find_register("rsp"); + + if (id->flag && id->base && id->base != rbp && id->base != rsp) + return 1; + return 0; +} + +#endif diff --git a/native/x64/scf_x64_util.h b/native/x64/scf_x64_util.h index 837c30a..da4fc06 100644 --- a/native/x64/scf_x64_util.h +++ b/native/x64/scf_x64_util.h @@ -325,4 +325,3 @@ static inline void scf_SIB_setBase(uint8_t* SIB, uint8_t base) } #endif - diff --git a/parse/Makefile b/parse/Makefile index fdc5dcc..787673c 100644 --- a/parse/Makefile +++ b/parse/Makefile @@ -5,12 +5,14 @@ CFILES += ../lex/scf_lex_util.c CFILES += scf_parse_util.c CFILES += scf_parse.c +CFILES += scf_symtab.c CFILES += main.c CFILES += scf_operator_handler_semantic.c CFILES += scf_operator_handler_expr.c CFILES += scf_operator_handler_const.c CFILES += ../native/scf_native.c +CFILES += ../native/scf_instruction.c CFILES += ../native/x64/scf_x64.c CFILES += ../native/x64/scf_x64_inst.c CFILES += ../native/x64/scf_x64_inst_util.c diff --git a/parse/main.c b/parse/main.c index c4c5324..a4bc589 100644 --- a/parse/main.c +++ b/parse/main.c @@ -204,24 +204,6 @@ int main(int argc, char* argv[]) printf("\n"); - scf_parse_t* parse = NULL; - - if (scf_parse_open(&parse) < 0) { - scf_loge("\n"); - return -1; - } - - for (i = 0; i < srcs->size; i++) { - char* file = srcs->data[i]; - - assert(file); - - if (scf_parse_file(parse, file) < 0) { - scf_loge("parse file '%s' failed\n", file); - return -1; - } - } - char* obj = "1.o"; char* exec = "1.out"; @@ -232,17 +214,37 @@ int main(int argc, char* argv[]) exec = out; } - if (scf_parse_compile(parse, arch, _3ac) < 0) { - scf_loge("\n"); - return -1; - } + if (srcs->size > 0) { + scf_parse_t* parse = NULL; - if (scf_parse_to_obj(parse, obj, arch) < 0) { - scf_loge("\n"); - return -1; - } + if (scf_parse_open(&parse) < 0) { + scf_loge("\n"); + return -1; + } + + for (i = 0; i < srcs->size; i++) { + char* file = srcs->data[i]; + + assert(file); + + if (scf_parse_file(parse, file) < 0) { + scf_loge("parse file '%s' failed\n", file); + return -1; + } + } + + if (scf_parse_compile(parse, arch, _3ac) < 0) { + scf_loge("\n"); + return -1; + } - scf_parse_close(parse); + if (scf_parse_to_obj(parse, obj, arch) < 0) { + scf_loge("\n"); + return -1; + } + + scf_parse_close(parse); + } if (!link) { printf("%s(),%d, main ok\n", __func__, __LINE__); @@ -271,10 +273,11 @@ int main(int argc, char* argv[]) else MAIN_ADD_FILES(__objs, __sofiles, "x64"); - - if (scf_vector_add(objs, obj) < 0) { - scf_loge("\n"); - return -1; + if (srcs->size > 0) { + if (scf_vector_add(objs, obj) < 0) { + scf_loge("\n"); + return -1; + } } if (scf_elf_link(objs, afiles, sofiles, sysroot, arch, exec, dyn) < 0) { diff --git a/parse/scf_dfa.c b/parse/scf_dfa.c index e10711b..ded7114 100644 --- a/parse/scf_dfa.c +++ b/parse/scf_dfa.c @@ -2,15 +2,6 @@ #include"scf_lex_word.h" #include -extern scf_dfa_ops_t dfa_ops_parse; - -static scf_dfa_ops_t* dfa_ops_array[] = -{ - &dfa_ops_parse, - - NULL, -}; - static int _scf_dfa_node_parse_word(scf_dfa_t* dfa, scf_dfa_node_t* node, scf_vector_t* words, void* data, int pre_hook_flag); void scf_dfa_del_hook_by_name(scf_dfa_hook_t** pp, const char* name) @@ -157,25 +148,9 @@ int scf_dfa_node_add_child(scf_dfa_node_t* parent, scf_dfa_node_t* child) return 0; } -int scf_dfa_open(scf_dfa_t** pdfa, const char* name, void* priv) +int scf_dfa_open(scf_dfa_t** pdfa, scf_dfa_ops_t* ops, void* priv) { - if (!pdfa || !name) { - scf_loge("\n"); - return -1; - } - - scf_dfa_ops_t* ops = NULL; - - int i; - for (i = 0; dfa_ops_array[i]; i++) { - ops = dfa_ops_array[i]; - - if (!strcmp(name, ops->name)) - break; - ops = NULL; - } - - if (!ops) { + if (!pdfa || !ops) { scf_loge("\n"); return -1; } diff --git a/parse/scf_dfa.h b/parse/scf_dfa.h index 6dc8fc5..ddd68a1 100644 --- a/parse/scf_dfa.h +++ b/parse/scf_dfa.h @@ -23,8 +23,8 @@ typedef struct scf_dfa_ops_s scf_dfa_ops_t; typedef struct scf_dfa_module_s scf_dfa_module_t; typedef struct scf_dfa_hook_s scf_dfa_hook_t; -typedef int (*scf_dfa_is_pt)( scf_dfa_t* dfa, void* word); -typedef int (*scf_dfa_action_pt)(scf_dfa_t* dfa, scf_vector_t* words, void* data); +typedef int (*scf_dfa_is_pt )(scf_dfa_t* dfa, void* word); +typedef int (*scf_dfa_action_pt)(scf_dfa_t* dfa, scf_vector_t* words, void* data); enum scf_dfa_hook_types { @@ -106,9 +106,9 @@ static inline int scf_dfa_action_next(scf_dfa_t* dfa, scf_vector_t* words, void* #define SCF_DFA_MODULE_NODE(dfa, module, node, is, action) \ { \ - char str[256]; \ - snprintf(str, sizeof(str) - 1, "%s_%s", dfa_module_##module.name, #node); \ - scf_dfa_node_t* node = scf_dfa_node_alloc(str, is, action); \ + char __str[256]; \ + snprintf(__str, sizeof(__str) - 1, "%s_%s", dfa_module_##module.name, #node); \ + scf_dfa_node_t* node = scf_dfa_node_alloc(__str, is, action); \ if (!node) { \ printf("%s(),%d, error: \n", __func__, __LINE__); \ return -1; \ @@ -148,19 +148,17 @@ static inline int scf_dfa_action_next(scf_dfa_t* dfa, scf_vector_t* words, void* scf_dfa_node_t* scf_dfa_node_alloc(const char* name, scf_dfa_is_pt is, scf_dfa_action_pt action); void scf_dfa_node_free(scf_dfa_node_t* node); -int scf_dfa_open(scf_dfa_t** pdfa, const char* name, void* priv); -void scf_dfa_close(scf_dfa_t* dfa); +int scf_dfa_open (scf_dfa_t** pdfa, scf_dfa_ops_t* ops, void* priv); +void scf_dfa_close(scf_dfa_t* dfa); -int scf_dfa_add_node(scf_dfa_t* dfa, scf_dfa_node_t* node); +int scf_dfa_add_node (scf_dfa_t* dfa, scf_dfa_node_t* node); scf_dfa_node_t* scf_dfa_find_node(scf_dfa_t* dfa, const char* name); int scf_dfa_node_add_child(scf_dfa_node_t* parent, scf_dfa_node_t* child); int scf_dfa_parse_word(scf_dfa_t* dfa, void* word, void* data); -void scf_dfa_del_hook( scf_dfa_hook_t** pp, scf_dfa_hook_t* sentinel); +void scf_dfa_del_hook (scf_dfa_hook_t** pp, scf_dfa_hook_t* sentinel); void scf_dfa_del_hook_by_name(scf_dfa_hook_t** pp, const char* name); - #endif - diff --git a/parse/scf_dfa_parse.c b/parse/scf_dfa_parse.c index 21dfc9b..aa94ddd 100644 --- a/parse/scf_dfa_parse.c +++ b/parse/scf_dfa_parse.c @@ -84,9 +84,42 @@ scf_dfa_module_t* dfa_modules[] = &dfa_module_block, }; +static void* dfa_pop_word(scf_dfa_t* dfa) +{ + scf_parse_t* parse = dfa->priv; + + scf_lex_word_t* w = NULL; + scf_lex_pop_word(parse->lex, &w); + return w; +} + +static int dfa_push_word(scf_dfa_t* dfa, void* word) +{ + scf_parse_t* parse = dfa->priv; + + scf_lex_word_t* w = word; + scf_lex_push_word(parse->lex, w); + return 0; +} + +static void dfa_free_word(void* word) +{ + scf_lex_word_t* w = word; + scf_lex_word_free(w); +} + +scf_dfa_ops_t dfa_ops_parse = +{ + .name = "parse", + + .pop_word = dfa_pop_word, + .push_word = dfa_push_word, + .free_word = dfa_free_word, +}; + int scf_parse_dfa_init(scf_parse_t* parse) { - if (scf_dfa_open(&parse->dfa, "parse", parse) < 0) { + if (scf_dfa_open(&parse->dfa, &dfa_ops_parse, parse) < 0) { scf_loge("\n"); return -1; } @@ -145,36 +178,3 @@ int scf_parse_dfa_init(scf_parse_t* parse) return 0; } - -static void* dfa_pop_word(scf_dfa_t* dfa) -{ - scf_parse_t* parse = dfa->priv; - - scf_lex_word_t* w = NULL; - scf_lex_pop_word(parse->lex, &w); - return w; -} - -static int dfa_push_word(scf_dfa_t* dfa, void* word) -{ - scf_parse_t* parse = dfa->priv; - - scf_lex_word_t* w = word; - scf_lex_push_word(parse->lex, w); - return 0; -} - -static void dfa_free_word(void* word) -{ - scf_lex_word_t* w = word; - scf_lex_word_free(w); -} - -scf_dfa_ops_t dfa_ops_parse = -{ - .name = "parse", - - .pop_word = dfa_pop_word, - .push_word = dfa_push_word, - .free_word = dfa_free_word, -}; diff --git a/parse/scf_parse.c b/parse/scf_parse.c index 8ed0aa6..d4cca22 100644 --- a/parse/scf_parse.c +++ b/parse/scf_parse.c @@ -5,19 +5,11 @@ #include"scf_dfa.h" #include"scf_basic_block.h" #include"scf_optimizer.h" +#include"scf_symtab.h" #include"scf_elf.h" #include"scf_leb128.h" #include"scf_eda.h" -#define ADD_SECTION_SYMBOL(sh_index, sh_name) \ - do { \ - int ret = _scf_parse_add_sym(parse, sh_name, 0, 0, sh_index, ELF64_ST_INFO(STB_LOCAL, STT_SECTION)); \ - if (ret < 0) { \ - scf_loge("\n"); \ - return ret; \ - } \ - } while (0) - scf_base_type_t base_types[] = { {SCF_VAR_CHAR, "char", 1}, @@ -108,58 +100,6 @@ int scf_parse_close(scf_parse_t* parse) return 0; } -static int _find_sym(const void* v0, const void* v1) -{ - const char* name = v0; - const scf_elf_sym_t* sym = v1; - - if (!sym->name) - return -1; - - return strcmp(name, sym->name); -} - -static int _scf_parse_add_sym(scf_parse_t* parse, const char* name, - uint64_t st_size, Elf64_Addr st_value, - uint16_t st_shndx, uint8_t st_info) -{ - scf_elf_sym_t* sym = NULL; - scf_elf_sym_t* sym2 = NULL; - - if (name) - sym = scf_vector_find_cmp(parse->symtab, name, _find_sym); - - if (!sym) { - sym = calloc(1, sizeof(scf_elf_sym_t)); - if (!sym) - return -ENOMEM; - - if (name) { - sym->name = strdup(name); - if (!sym->name) { - free(sym); - return -ENOMEM; - } - } - - sym->st_size = st_size; - sym->st_value = st_value; - sym->st_shndx = st_shndx; - sym->st_info = st_info; - - int ret = scf_vector_add(parse->symtab, sym); - if (ret < 0) { - if (sym->name) - free(sym->name); - free(sym); - scf_loge("\n"); - return ret; - } - } - - return 0; -} - int scf_parse_file(scf_parse_t* parse, const char* path) { if (!parse || !path) @@ -179,7 +119,7 @@ int scf_parse_file(scf_parse_t* parse, const char* path) return 0; } - if (scf_lex_open(&parse->lex, path) < 0) + if (scf_lex_open(&parse->lex, path, NULL) < 0) return -1; scf_ast_add_file_block(parse->ast, path); @@ -1380,52 +1320,6 @@ static int _fill_function_inst(scf_string_t* code, scf_function_t* f, int64_t of return 0; } -static int _scf_parse_add_rela(scf_vector_t* relas, scf_parse_t* parse, scf_rela_t* r, const char* name, uint16_t st_shndx) -{ - scf_elf_rela_t* rela; - - int ret; - int i; - - for (i = 0; i < parse->symtab->size; i++) { - scf_elf_sym_t* sym = parse->symtab->data[i]; - - if (!sym->name) - continue; - - if (!strcmp(name, sym->name)) - break; - } - - if (i == parse->symtab->size) { - ret = _scf_parse_add_sym(parse, name, 0, 0, st_shndx, ELF64_ST_INFO(STB_GLOBAL, STT_NOTYPE)); - if (ret < 0) { - scf_loge("\n"); - return ret; - } - } - - scf_logd("rela: %s, offset: %ld\n", name, r->text_offset); - - rela = calloc(1, sizeof(scf_elf_rela_t)); - if (!rela) - return -ENOMEM; - - rela->name = (char*)name; - rela->r_offset = r->text_offset; - rela->r_info = ELF64_R_INFO(i + 1, r->type); - rela->r_addend = r->addend; - - ret = scf_vector_add(relas, rela); - if (ret < 0) { - scf_loge("\n"); - free(rela); - return ret; - } - - return 0; -} - static int _fill_data(scf_parse_t* parse, scf_variable_t* v, scf_string_t* data, uint32_t shndx) { char* name; @@ -1476,7 +1370,7 @@ static int _fill_data(scf_parse_t* parse, scf_variable_t* v, scf_string_t* data, else stb = STB_GLOBAL; - ret = _scf_parse_add_sym(parse, name, size, data->len, shndx, ELF64_ST_INFO(stb, STT_OBJECT)); + ret = scf_symtab_add_sym(parse->symtab, name, size, data->len, shndx, ELF64_ST_INFO(stb, STT_OBJECT)); if (ret < 0) return ret; @@ -1606,7 +1500,7 @@ static int _scf_parse_add_data_relas(scf_parse_t* parse, scf_elf_context_t* elf) } if (j == parse->symtab->size) { - ret = _scf_parse_add_sym(parse, name, 0, 0, 0, ELF64_ST_INFO(STB_GLOBAL, STT_NOTYPE)); + ret = scf_symtab_add_sym(parse->symtab, name, 0, 0, 0, ELF64_ST_INFO(STB_GLOBAL, STT_NOTYPE)); if (ret < 0) { scf_loge("\n"); return ret; @@ -1851,11 +1745,10 @@ static int _add_debug_sections(scf_parse_t* parse, scf_elf_context_t* elf) if (str < 0) return str; - ADD_SECTION_SYMBOL(abbrev, ".debug_abbrev"); - ADD_SECTION_SYMBOL(info, ".debug_info"); - ADD_SECTION_SYMBOL(line, ".debug_line"); - ADD_SECTION_SYMBOL(str, ".debug_str"); - + ADD_SECTION_SYMBOL(parse->symtab, abbrev, ".debug_abbrev"); + ADD_SECTION_SYMBOL(parse->symtab, info, ".debug_info"); + ADD_SECTION_SYMBOL(parse->symtab, line, ".debug_line"); + ADD_SECTION_SYMBOL(parse->symtab, str, ".debug_str"); return 0; } @@ -1943,9 +1836,9 @@ static int _scf_parse_add_text_relas(scf_parse_t* parse, scf_elf_context_t* elf, } if (r->func->node.define_flag) - ret = _scf_parse_add_rela(relas, parse, r, r->func->signature->data, SCF_SHNDX_TEXT); + ret = scf_symtab_add_rela(relas, parse->symtab, r, r->func->signature->data, SCF_SHNDX_TEXT); else - ret = _scf_parse_add_rela(relas, parse, r, r->func->signature->data, 0); + ret = scf_symtab_add_rela(relas, parse->symtab, r, r->func->signature->data, 0); if (ret < 0) { scf_loge("\n"); @@ -1962,7 +1855,7 @@ static int _scf_parse_add_text_relas(scf_parse_t* parse, scf_elf_context_t* elf, else name = r->var->signature->data; - ret = _scf_parse_add_rela(relas, parse, r, name, 2); + ret = scf_symtab_add_rela(relas, parse->symtab, r, name, 2); if (ret < 0) { scf_loge("\n"); goto error; @@ -2003,19 +1896,6 @@ error: return ret; } -static int _sym_cmp(const void* v0, const void* v1) -{ - const scf_elf_sym_t* sym0 = *(const scf_elf_sym_t**)v0; - const scf_elf_sym_t* sym1 = *(const scf_elf_sym_t**)v1; - - if (STB_LOCAL == ELF64_ST_BIND(sym0->st_info)) { - if (STB_GLOBAL == ELF64_ST_BIND(sym1->st_info)) - return -1; - } else if (STB_LOCAL == ELF64_ST_BIND(sym1->st_info)) - return 1; - return 0; -} - static int _add_debug_file_names(scf_parse_t* parse) { scf_block_t* root = parse->ast->root_block; @@ -2030,7 +1910,7 @@ static int _add_debug_file_names(scf_parse_t* parse) if (SCF_OP_BLOCK != b->node.type) continue; - ret = _scf_parse_add_sym(parse, b->name->data, 0, 0, SHN_ABS, ELF64_ST_INFO(STB_LOCAL, STT_FILE)); + ret = scf_symtab_add_sym(parse->symtab, b->name->data, 0, 0, SHN_ABS, ELF64_ST_INFO(STB_LOCAL, STT_FILE)); if (ret < 0) { scf_loge("\n"); return ret; @@ -2173,7 +2053,7 @@ int scf_parse_write_elf(scf_parse_t* parse, scf_vector_t* functions, scf_vector_ if (ret < 0) goto error; - qsort(parse->symtab->data, parse->symtab->size, sizeof(void*), _sym_cmp); + qsort(parse->symtab->data, parse->symtab->size, sizeof(void*), __symtab_sort_cmp); ret = _scf_parse_add_data_relas(parse, elf); if (ret < 0) @@ -2241,7 +2121,7 @@ int64_t scf_parse_fill_code2(scf_parse_t* parse, scf_vector_t* functions, scf_ve if (ret < 0) return ret; - ret = _scf_parse_add_sym(parse, f->signature->data, f->code_bytes, offset, SCF_SHNDX_TEXT, ELF64_ST_INFO(STB_GLOBAL, STT_FUNC)); + ret = scf_symtab_add_sym(parse->symtab, f->signature->data, f->code_bytes, offset, SCF_SHNDX_TEXT, ELF64_ST_INFO(STB_GLOBAL, STT_FUNC)); if (ret < 0) return ret; @@ -2289,9 +2169,9 @@ int scf_parse_fill_code(scf_parse_t* parse, scf_vector_t* functions, scf_vector_ scf_string_t* file_name = parse->debug->file_names->data[0]; const char* path = file_name->data; - ADD_SECTION_SYMBOL(SCF_SHNDX_TEXT, ".text"); - ADD_SECTION_SYMBOL(SCF_SHNDX_RODATA, ".rodata"); - ADD_SECTION_SYMBOL(SCF_SHNDX_DATA, ".data"); + ADD_SECTION_SYMBOL(parse->symtab, SCF_SHNDX_TEXT, ".text"); + ADD_SECTION_SYMBOL(parse->symtab, SCF_SHNDX_RODATA, ".rodata"); + ADD_SECTION_SYMBOL(parse->symtab, SCF_SHNDX_DATA, ".data"); scf_dwarf_info_entry_t* cu = NULL; scf_dwarf_line_result_t* r = NULL; diff --git a/parse/scf_symtab.c b/parse/scf_symtab.c new file mode 100644 index 0000000..76f9a93 --- /dev/null +++ b/parse/scf_symtab.c @@ -0,0 +1,100 @@ +#include"scf_symtab.h" + +static int _find_sym(const void* v0, const void* v1) +{ + const char* name = v0; + const scf_elf_sym_t* sym = v1; + + if (!sym->name) + return -1; + + return strcmp(name, sym->name); +} + +int scf_symtab_add_sym(scf_vector_t* symtab, const char* name, + uint64_t st_size, + Elf64_Addr st_value, + uint16_t st_shndx, + uint8_t st_info) +{ + scf_elf_sym_t* sym = NULL; + + if (name) + sym = scf_vector_find_cmp(symtab, name, _find_sym); + + if (!sym) { + sym = calloc(1, sizeof(scf_elf_sym_t)); + if (!sym) + return -ENOMEM; + + if (name) { + sym->name = strdup(name); + if (!sym->name) { + free(sym); + return -ENOMEM; + } + } + + sym->st_size = st_size; + sym->st_value = st_value; + sym->st_shndx = st_shndx; + sym->st_info = st_info; + + int ret = scf_vector_add(symtab, sym); + if (ret < 0) { + if (sym->name) + free(sym->name); + free(sym); + scf_loge("\n"); + return ret; + } + } + + return 0; +} + +int scf_symtab_add_rela(scf_vector_t* relas, scf_vector_t* symtab, scf_rela_t* r, const char* name, uint16_t st_shndx) +{ + scf_elf_rela_t* rela; + scf_elf_sym_t* sym; + + int ret; + int i; + + for (i = 0; i < symtab->size; i++) { + sym = symtab->data[i]; + + if (!sym->name) + continue; + + if (!strcmp(name, sym->name)) + break; + } + + if (i == symtab->size) { + ret = scf_symtab_add_sym(symtab, name, 0, 0, st_shndx, ELF64_ST_INFO(STB_GLOBAL, STT_NOTYPE)); + if (ret < 0) { + scf_loge("\n"); + return ret; + } + } + + scf_logd("rela: %s, offset: %ld\n", name, r->text_offset); + + rela = calloc(1, sizeof(scf_elf_rela_t)); + if (!rela) + return -ENOMEM; + + rela->name = (char*)name; + rela->r_offset = r->text_offset; + rela->r_info = ELF64_R_INFO(i + 1, r->type); + rela->r_addend = r->addend; + + ret = scf_vector_add(relas, rela); + if (ret < 0) { + free(rela); + return ret; + } + + return 0; +} diff --git a/parse/scf_symtab.h b/parse/scf_symtab.h new file mode 100644 index 0000000..dcab871 --- /dev/null +++ b/parse/scf_symtab.h @@ -0,0 +1,37 @@ +#ifndef SCF_SYMTAB_H +#define SCF_SYMTAB_H + +#include"scf_elf.h" +#include"scf_instruction.h" + +int scf_symtab_add_sym(scf_vector_t* symtab, const char* name, + uint64_t st_size, + Elf64_Addr st_value, + uint16_t st_shndx, + uint8_t st_info); + +int scf_symtab_add_rela(scf_vector_t* relas, scf_vector_t* symtab, scf_rela_t* r, const char* name, uint16_t st_shndx); + +#define ADD_SECTION_SYMBOL(symtab, sh_index, sh_name) \ + do { \ + int ret = scf_symtab_add_sym(symtab, sh_name, 0, 0, sh_index, ELF64_ST_INFO(STB_LOCAL, STT_SECTION)); \ + if (ret < 0) { \ + scf_loge("\n"); \ + return ret; \ + } \ + } while (0) + +static int __symtab_sort_cmp(const void* v0, const void* v1) +{ + const scf_elf_sym_t* sym0 = *(const scf_elf_sym_t**)v0; + const scf_elf_sym_t* sym1 = *(const scf_elf_sym_t**)v1; + + if (STB_LOCAL == ELF64_ST_BIND(sym0->st_info)) { + if (STB_GLOBAL == ELF64_ST_BIND(sym1->st_info)) + return -1; + } else if (STB_LOCAL == ELF64_ST_BIND(sym1->st_info)) + return 1; + return 0; +} + +#endif diff --git a/util/scf_string.h b/util/scf_string.h index cbf8717..040111f 100644 --- a/util/scf_string.h +++ b/util/scf_string.h @@ -6,7 +6,7 @@ typedef struct { int capacity; size_t len; - char* data; + uint8_t* data; } scf_string_t; -- 2.25.1