From e61a2792a16685a2784a7f7971ad7d5a28ec36af Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Wed, 26 Feb 2025 23:47:01 +0800 Subject: [PATCH] support to generate shared object .so (dynamic library) --- core/scf_lex_word.h | 5 +- core/scf_operator.c | 1 + elf/scf_dwarf.c | 12 + elf/scf_elf.c | 11 +- elf/scf_elf.h | 4 +- elf/scf_elf_link.c | 21 +- elf/scf_elf_link.h | 2 +- elf/scf_elf_native.h | 6 +- elf/scf_elf_x64.c | 412 ++++++++++++++++-------- elf/scf_elf_x64.h | 7 +- elf/scf_elf_x64_so.c | 578 ++++++++++++++++++++++++++-------- examples/init_struct_array.c | 12 +- examples/struct_pointer_opt.c | 1 + lex/scf_lex.c | 4 + lib/x64/scf_object.o | Bin 6973 -> 7017 bytes parse/main.c | 26 +- parse/scf_dfa_expr.c | 2 +- parse/scf_dfa_init_data.c | 41 ++- parse/scf_dfa_macro.c | 164 ++++++++++ 19 files changed, 1028 insertions(+), 281 deletions(-) diff --git a/core/scf_lex_word.h b/core/scf_lex_word.h index f6f460a..24e6a2e 100644 --- a/core/scf_lex_word.h +++ b/core/scf_lex_word.h @@ -108,8 +108,9 @@ enum scf_lex_words SCF_LEX_WORD_KEY_OPERATOR, // operator SCF_LEX_WORD_KEY_UNDERLINE, // _ underline - SCF_LEX_WORD_KEY_INCLUDE, // include - SCF_LEX_WORD_KEY_DEFINE, // define + SCF_LEX_WORD_KEY_INCLUDE, // #include + SCF_LEX_WORD_KEY_DEFINE, // #define + SCF_LEX_WORD_KEY_ENDIF, // #endif // data types SCF_LEX_WORD_KEY_CHAR, // char diff --git a/core/scf_operator.c b/core/scf_operator.c index ab12ac9..69351e8 100644 --- a/core/scf_operator.c +++ b/core/scf_operator.c @@ -8,6 +8,7 @@ static scf_operator_t base_operators[] = {"(", NULL, SCF_OP_CALL, 1, -1, SCF_OP_ASSOCIATIVITY_LEFT}, {"[", "i", SCF_OP_ARRAY_INDEX, 1, 2, SCF_OP_ASSOCIATIVITY_LEFT}, {"->", "p", SCF_OP_POINTER, 1, 2, SCF_OP_ASSOCIATIVITY_LEFT}, + {".", "p", SCF_OP_POINTER, 1, 2, SCF_OP_ASSOCIATIVITY_LEFT}, {"va_start", NULL, SCF_OP_VA_START, 1, 2, SCF_OP_ASSOCIATIVITY_LEFT}, {"va_arg", NULL, SCF_OP_VA_ARG, 1, 2, SCF_OP_ASSOCIATIVITY_LEFT}, diff --git a/elf/scf_dwarf.c b/elf/scf_dwarf.c index 9768f61..c7d8771 100644 --- a/elf/scf_dwarf.c +++ b/elf/scf_dwarf.c @@ -332,6 +332,18 @@ int scf_dwarf_debug_encode(scf_dwarf_t* debug) if (ret < 0) return ret; + size_t n = debug->debug_abbrev->len + + debug->debug_info->len + + debug->debug_line->len + + debug->str->len; + + n &= 0x7; + if (n) { + ret = scf_string_fill_zero(debug->str, 8 - n); + if (ret < 0) + return ret; + } + return 0; } diff --git a/elf/scf_elf.c b/elf/scf_elf.c index 238d2a2..76e5f29 100644 --- a/elf/scf_elf.c +++ b/elf/scf_elf.c @@ -228,9 +228,18 @@ int scf_elf_write_rel(scf_elf_context_t* elf) int scf_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) { - if (elf && elf->ops && elf->ops->write_rel && sysroot) + if (elf && elf->ops && elf->ops->write_exec && sysroot) return elf->ops->write_exec(elf, sysroot); scf_loge("\n"); return -1; } + +int scf_elf_write_dyn(scf_elf_context_t* elf, const char* sysroot) +{ + if (elf && elf->ops && elf->ops->write_dyn && sysroot) + return elf->ops->write_dyn(elf, sysroot); + + scf_loge("\n"); + return -1; +} diff --git a/elf/scf_elf.h b/elf/scf_elf.h index 4face48..cd3a606 100644 --- a/elf/scf_elf.h +++ b/elf/scf_elf.h @@ -71,6 +71,7 @@ struct scf_elf_ops_s int (*add_dyn_rela)(scf_elf_context_t* elf, const scf_elf_rela_t* rela); int (*write_rel )(scf_elf_context_t* elf); + int (*write_dyn )(scf_elf_context_t* elf, const char* sysroot); int (*write_exec)(scf_elf_context_t* elf, const char* sysroot); }; @@ -105,7 +106,8 @@ int scf_elf_read_syms (scf_elf_context_t* elf, scf_vector_t* syms, const char* int scf_elf_read_relas(scf_elf_context_t* elf, scf_vector_t* relas, const char* sh_name); int scf_elf_read_phdrs(scf_elf_context_t* elf, scf_vector_t* phdrs); -int scf_elf_write_rel( scf_elf_context_t* elf); +int scf_elf_write_rel (scf_elf_context_t* elf); +int scf_elf_write_dyn (scf_elf_context_t* elf, const char* sysroot); int scf_elf_write_exec(scf_elf_context_t* elf, const char* sysroot); #endif diff --git a/elf/scf_elf_link.c b/elf/scf_elf_link.c index 095fc99..719f8d7 100644 --- a/elf/scf_elf_link.c +++ b/elf/scf_elf_link.c @@ -1036,7 +1036,7 @@ static int link_relas(scf_elf_file_t* exec, char* afiles[], int nb_afiles, char* return 0; } -int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles, const char* sysroot, const char* arch, const char* out) +int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles, const char* sysroot, const char* arch, const char* out, int dyn_flag) { scf_elf_file_t* exec = NULL; scf_elf_file_t* so = NULL; @@ -1100,7 +1100,19 @@ int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles } } - size_t bytes = 0; + size_t bytes = exec->debug_abbrev->len + + exec->debug_info->len + + exec->debug_line->len + + exec->debug_str->len; + + bytes &= 0x7; + if (bytes) { + ret = scf_string_fill_zero(exec->debug_str, 8 - bytes); + if (ret < 0) + return ret; + } + + bytes = 0; #define ADD_SECTION(sname, flags, align, value) \ do { \ @@ -1171,7 +1183,10 @@ int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles ADD_RELA_SECTION(debug_info, SCF_ELF_FILE_SHNDX(debug_info)); ADD_RELA_SECTION(debug_line, SCF_ELF_FILE_SHNDX(debug_line)); - ret = scf_elf_write_exec(exec->elf, sysroot); + if (dyn_flag) + ret = scf_elf_write_dyn(exec->elf, sysroot); + else + ret = scf_elf_write_exec(exec->elf, sysroot); if (ret < 0) return ret; diff --git a/elf/scf_elf_link.h b/elf/scf_elf_link.h index d976dcc..acd6fca 100644 --- a/elf/scf_elf_link.h +++ b/elf/scf_elf_link.h @@ -61,6 +61,6 @@ typedef struct { int scf_elf_file_close(scf_elf_file_t* ef, void (*rela_free)(void*), void (*sym_free)(void*)); -int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles, const char* sysroot, const char* arch, const char* out); +int scf_elf_link(scf_vector_t* objs, scf_vector_t* afiles, scf_vector_t* sofiles, const char* sysroot, const char* arch, const char* out, int dyn_flag); #endif diff --git a/elf/scf_elf_native.h b/elf/scf_elf_native.h index d36abc1..cca7d3d 100644 --- a/elf/scf_elf_native.h +++ b/elf/scf_elf_native.h @@ -28,6 +28,9 @@ typedef struct { scf_string_t* name; + uint32_t hash; + uint32_t hash_n; + Elf64_Sym sym; int index; @@ -55,11 +58,10 @@ typedef struct { scf_vector_t* dyn_needs; scf_vector_t* dyn_relas; + elf_section_t* gnu_hash; elf_section_t* interp; elf_section_t* dynsym; elf_section_t* dynstr; - elf_section_t* gnu_version; - elf_section_t* gnu_version_r; elf_section_t* rela_plt; elf_section_t* plt; elf_section_t* dynamic; diff --git a/elf/scf_elf_x64.c b/elf/scf_elf_x64.c index 752645f..9b20ab0 100644 --- a/elf/scf_elf_x64.c +++ b/elf/scf_elf_x64.c @@ -254,68 +254,192 @@ static void _x64_elf_process_syms(elf_native_t* x64, uint32_t cs_index) } } -static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) +static void __x64_sym_set_addr(elf_native_t* x64, elf_section_t* s, uint64_t base) { - elf_native_t* x64 = elf->priv; - int nb_phdrs = 3; + elf_sym_t* sym; + int i; - if (x64->dynsyms && x64->dynsyms->size) { - __x64_elf_add_dyn(x64, sysroot); - nb_phdrs = 6; + for (i = 0; i < x64->symbols->size; i++) { + sym = x64->symbols->data[i]; + + uint32_t shndx = sym->sym.st_shndx; + + if (shndx == s->index) + sym->sym.st_value += base; + + scf_logd("sym: %s, %#lx, st_shndx: %d\n", sym->name->data, sym->sym.st_value, sym->sym.st_shndx); } +} - int nb_sections = 1 + x64->sections->size + 1 + 1 + 1; - uint64_t shstrtab_offset = 1; - uint64_t strtab_offset = 1; - uint64_t dynstr_offset = 1; - Elf64_Off phdr_offset = sizeof(x64->eh) + sizeof(Elf64_Shdr) * nb_sections; - Elf64_Off section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; +static int __x64_sym_find_addr(elf_native_t* x64, const char* name, uint64_t* addr) +{ + elf_sym_t* sym; + int i; - elf_section_t* s; - elf_section_t* cs = NULL; - elf_section_t* ros = NULL; - elf_section_t* ds = NULL; - elf_section_t* crela = NULL; - elf_section_t* drela = NULL; - elf_sym_t* sym; + for (i = 0; i < x64->symbols->size; i++) { + sym = x64->symbols->data[i]; + + if (!strcmp(sym->name->data, name)) { + *addr = sym->sym.st_value; + return 0; + } + } + + return -1; +} +static int _x64_elf_process_sections(elf_native_t* x64, uint64_t* section_offset, + elf_section_t** cs, + elf_section_t** ros, elf_section_t** ds, + elf_section_t** crela, elf_section_t** drela) +{ + elf_section_t* s; int i; + for (i = 0; i < x64->sections->size; i++) { s = x64->sections->data[i]; if (!strcmp(".text", s->name->data)) { assert(s->data_len > 0); - assert(!cs); - cs = s; + assert(!*cs); + *cs = s; } else if (!strcmp(".rodata", s->name->data)) { assert(s->data_len >= 0); - assert(!ros); - ros = s; + assert(!*ros); + *ros = s; } else if (!strcmp(".data", s->name->data)) { assert(s->data_len >= 0); - assert(!ds); - ds = s; + assert(!*ds); + *ds = s; } else if (!strcmp(".rela.text", s->name->data)) { - assert(!crela); - crela = s; + assert(!*crela); + *crela = s; } else if (!strcmp(".rela.data", s->name->data)) { - assert(!drela); - drela = s; + assert(!*drela); + *drela = s; } - s->offset = section_offset; + s->offset = *section_offset; + *section_offset += s->data_len; + } + + return 0; +} + +static int _x64_elf_write_section_headers(scf_elf_context_t* elf, int nb_sections, uint64_t section_offset, uint64_t strtab_offset, uint64_t shstrtab_offset) +{ + elf_section_t* s; + elf_native_t* x64 = elf->priv; + elf_sym_t* sym; + int i; + + for (i = 0; i < x64->sections->size; i++) { + s = x64->sections->data[i]; + + if (SHT_RELA == s->sh.sh_type && 0 == s->sh.sh_link) + s->sh.sh_link = nb_sections - 3; + + section_header(&s->sh, + shstrtab_offset, s->sh.sh_addr, + section_offset, s->data_len, + s->sh.sh_link, s->sh.sh_info, s->sh.sh_entsize); + + if (SHT_STRTAB != s->sh.sh_type) + s->sh.sh_addralign = 8; + section_offset += s->data_len; + shstrtab_offset += s->name->len + 1; + + fwrite(&s->sh, sizeof(s->sh), 1, elf->fp); } - assert(crela); + + // set user's symbols' name + int nb_local_syms = 1; + + for (i = 0; i < x64->symbols->size; i++) { + sym = x64->symbols->data[i]; + + if (sym->name) { + sym->sym.st_name = strtab_offset; + strtab_offset += sym->name->len + 1; + } else + sym->sym.st_name = 0; + + if (STB_LOCAL == ELF64_ST_BIND(sym->sym.st_info)) + nb_local_syms++; + } + + // write symtab section header + section_header(&x64->sh_symtab, + shstrtab_offset, 0, + section_offset, (x64->symbols->size + 1) * sizeof(Elf64_Sym), + nb_sections - 2, + nb_local_syms, sizeof(Elf64_Sym)); + + fwrite(&x64->sh_symtab, sizeof(x64->sh_symtab), 1, elf->fp); + + section_offset += (x64->symbols->size + 1) * sizeof(Elf64_Sym); + shstrtab_offset += strlen(".symtab") + 1; + + // write strtab section header + section_header(&x64->sh_strtab, + shstrtab_offset, 0, + section_offset, strtab_offset, + 0, 0, 0); + fwrite(&x64->sh_strtab, sizeof(x64->sh_strtab), 1, elf->fp); + + section_offset += strtab_offset; + shstrtab_offset += strlen(".strtab") + 1; + + // write shstrtab section header + uint64_t shstrtab_len = shstrtab_offset + strlen(".shstrtab") + 1; + + section_header(&x64->sh_shstrtab, + shstrtab_offset, 0, + section_offset, shstrtab_len, + 0, 0, 0); + fwrite(&x64->sh_shstrtab, sizeof(x64->sh_shstrtab), 1, elf->fp); + return 0; +} + +static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) +{ + elf_native_t* x64 = elf->priv; + int nb_phdrs = 3; + + if (x64->dynsyms && x64->dynsyms->size > 0) { + __x64_elf_add_dyn(x64, sysroot); + nb_phdrs = 6; + } + + int nb_sections = 1 + x64->sections->size + 1 + 1 + 1; + uint64_t shstrtab_offset = 1; + uint64_t strtab_offset = 1; + uint64_t dynstr_offset = 1; + Elf64_Off phdr_offset = sizeof(x64->eh) + sizeof(Elf64_Shdr) * nb_sections; + Elf64_Off section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; + + elf_section_t* s; + elf_section_t* cs = NULL; + elf_section_t* ros = NULL; + elf_section_t* ds = NULL; + elf_section_t* crela = NULL; + elf_section_t* drela = NULL; + elf_sym_t* sym; + + int ret; + int i; + + _x64_elf_process_sections(x64, §ion_offset, &cs, &ros, &ds, &crela, &drela); uint64_t cs_align = (cs ->offset + cs ->data_len + 0x200000 - 1) >> 21 << 21; uint64_t ro_align = (ros->offset + ros->data_len + 0x200000 - 1) >> 21 << 21; @@ -327,29 +451,18 @@ static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) uint64_t cs_base = cs->offset + rx_base; uint64_t ro_base = ros->offset + r_base; uint64_t ds_base = ds->offset + rw_base; - uint64_t _start = 0; + uint64_t _start = 0; - for (i = 0; i < x64->symbols->size; i++) { - sym = x64->symbols->data[i]; + __x64_sym_set_addr(x64, cs, cs_base); + __x64_sym_set_addr(x64, ros, ro_base); + __x64_sym_set_addr(x64, ds, ds_base); - uint32_t shndx = sym->sym.st_shndx; - - if (shndx == cs->index) - sym->sym.st_value += cs_base; - - else if (shndx == ros->index) - sym->sym.st_value += ro_base; - - else if (shndx == ds->index) - sym->sym.st_value += ds_base; - - scf_logd("sym: %s, %#lx, st_shndx: %d\n", sym->name->data, sym->sym.st_value, sym->sym.st_shndx); + if (crela) { + ret = _x64_elf_link_cs(x64, cs, crela, cs_base); + if (ret < 0) + return ret; } - int ret = _x64_elf_link_cs(x64, cs, crela, cs_base); - if (ret < 0) - return ret; - if (drela) { ret = _x64_elf_link_ds(x64, ds, drela); if (ret < 0) @@ -371,19 +484,9 @@ static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) __x64_elf_post_dyn(x64, rx_base, rw_base, cs); } - for (i = 0; i < x64->symbols->size; i++) { - sym = x64->symbols->data[i]; - - if (!strcmp(sym->name->data, "_start")) { - - if (0 != _start) { - scf_loge("\n"); - return -EINVAL; - } - - _start = sym->sym.st_value; - break; - } + if (__x64_sym_find_addr(x64, "_start", &_start) < 0) { + scf_loge("symbol '_start' NOT found when linking\n"); + return -1; } // write elf header @@ -394,86 +497,147 @@ static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) fwrite(&x64->sh_null, sizeof(x64->sh_null), 1, elf->fp); // write user's section header - section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; + section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; - for (i = 0; i < x64->sections->size; i++) { - s = x64->sections->data[i]; + _x64_elf_write_section_headers(elf, nb_sections, section_offset, strtab_offset, shstrtab_offset); - if (SHT_RELA == s->sh.sh_type && 0 == s->sh.sh_link) - s->sh.sh_link = nb_sections - 3; +#if 1 + if (6 == nb_phdrs) { + __x64_elf_write_phdr(elf, rx_base, phdr_offset, nb_phdrs); - section_header(&s->sh, shstrtab_offset, s->sh.sh_addr, - section_offset, s->data_len, - s->sh.sh_link, s->sh.sh_info, s->sh.sh_entsize); + __x64_elf_write_interp(elf, rx_base, x64->interp->offset, x64->interp->data_len); + } - if (SHT_STRTAB != s->sh.sh_type) - s->sh.sh_addralign = 8; + __x64_elf_write_text (elf, rx_base, 0, cs->offset + cs->data_len); + __x64_elf_write_rodata(elf, r_base, ros->offset, ros->data_len); - section_offset += s->data_len; - shstrtab_offset += s->name->len + 1; + if (6 == nb_phdrs) { + __x64_elf_write_data (elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len + x64->got_plt->data_len + ds->data_len); + __x64_elf_write_dynamic(elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len); + } else { + __x64_elf_write_data(elf, rw_base, ds->offset, ds->data_len); + } +#endif - fwrite(&s->sh, sizeof(s->sh), 1, elf->fp); + elf_write_sections(elf); + elf_write_symtab (elf); + elf_write_strtab (elf); + elf_write_shstrtab(elf); + return 0; +} + +static int _x64_elf_write_dyn(scf_elf_context_t* elf, const char* sysroot) +{ + elf_native_t* x64 = elf->priv; + elf_sym_t* entry = NULL; + elf_sym_t* sym; + + int ret; + int i; + + if (!x64->dynsyms) { + x64 ->dynsyms = scf_vector_alloc(); + if (!x64->dynsyms) + return -ENOMEM; } - // set user's symbols' name - int nb_local_syms = 1; + for (i = 0; i < x64->symbols->size; i++) { + sym = x64->symbols->data[i]; - for (i = 0; i < x64->symbols->size; i++) { - sym = x64->symbols->data[i]; + if (ELF64_ST_INFO(STB_GLOBAL, STT_FUNC) != sym->sym.st_info) + continue; - if (sym->name) { - sym->sym.st_name = strtab_offset; - strtab_offset += sym->name->len + 1; - } else - sym->sym.st_name = 0; + if (!entry) + entry = sym; - if (STB_LOCAL == ELF64_ST_BIND(sym->sym.st_info)) - nb_local_syms++; + ret = scf_vector_add(x64->dynsyms, sym); + if (ret < 0) + return ret; } - // write symtab section header - section_header(&x64->sh_symtab, shstrtab_offset, 0, - section_offset, (x64->symbols->size + 1) * sizeof(Elf64_Sym), - nb_sections - 2, nb_local_syms, sizeof(Elf64_Sym)); + __x64_so_add_dyn(x64, sysroot); - fwrite(&x64->sh_symtab, sizeof(x64->sh_symtab), 1, elf->fp); + int nb_phdrs = 4; + int nb_sections = 1 + x64->sections->size + 1 + 1 + 1; + uint64_t shstrtab_offset = 1; + uint64_t strtab_offset = 1; + uint64_t dynstr_offset = 1; + Elf64_Off phdr_offset = sizeof(x64->eh) + sizeof(Elf64_Shdr) * nb_sections; + Elf64_Off section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; - section_offset += (x64->symbols->size + 1) * sizeof(Elf64_Sym); - shstrtab_offset += strlen(".symtab") + 1; + elf_section_t* s; + elf_section_t* cs = NULL; + elf_section_t* ros = NULL; + elf_section_t* ds = NULL; + elf_section_t* crela = NULL; + elf_section_t* drela = NULL; - // write strtab section header - section_header(&x64->sh_strtab, shstrtab_offset, 0, - section_offset, strtab_offset, - 0, 0, 0); - fwrite(&x64->sh_strtab, sizeof(x64->sh_strtab), 1, elf->fp); - section_offset += strtab_offset; - shstrtab_offset += strlen(".strtab") + 1; + _x64_elf_process_sections(x64, §ion_offset, &cs, &ros, &ds, &crela, &drela); - // write shstrtab section header - uint64_t shstrtab_len = shstrtab_offset + strlen(".shstrtab") + 1; - section_header(&x64->sh_shstrtab, shstrtab_offset, 0, - section_offset, shstrtab_len, 0, 0, 0); - fwrite(&x64->sh_shstrtab, sizeof(x64->sh_shstrtab), 1, elf->fp); + uint64_t cs_align = (cs ->offset + cs ->data_len + 0x200000 - 1) >> 21 << 21; + uint64_t ro_align = (ros->offset + ros->data_len + 0x200000 - 1) >> 21 << 21; -#if 1 - if (6 == nb_phdrs) { - __x64_elf_write_phdr(elf, rx_base, phdr_offset, nb_phdrs); + uint64_t rx_base = 0; + uint64_t r_base = cs_align; + uint64_t rw_base = cs_align + ro_align; - __x64_elf_write_interp(elf, rx_base, x64->interp->offset, x64->interp->data_len); + uint64_t cs_base = cs->offset + rx_base; + uint64_t ro_base = ros->offset + r_base; + uint64_t ds_base = ds->offset + rw_base; + uint64_t _start = 0; + + __x64_sym_set_addr(x64, cs, cs_base); + __x64_sym_set_addr(x64, ros, ro_base); + __x64_sym_set_addr(x64, ds, ds_base); + + if (crela) { + ret = _x64_elf_link_cs(x64, cs, crela, cs_base); + if (ret < 0) + return ret; } + if (drela) { + ret = _x64_elf_link_ds(x64, ds, drela); + if (ret < 0) + return ret; + } + + ret = _x64_elf_link_sections(x64, cs->index, ds->index); + if (ret < 0) + return ret; + + _x64_elf_process_syms(x64, cs->index); + + cs ->sh.sh_addr = cs_base; + ds ->sh.sh_addr = ds_base; + ros->sh.sh_addr = ro_base; + + __x64_elf_post_dyn(x64, rx_base, rw_base, cs); + + if (entry) + _start = entry->sym.st_value; + + // write elf header + elf_header(&x64->eh, ET_DYN, EM_X86_64, _start, phdr_offset, nb_phdrs, nb_sections, nb_sections - 1); + fwrite(&x64->eh, sizeof(x64->eh), 1, elf->fp); + + // write null section header + fwrite(&x64->sh_null, sizeof(x64->sh_null), 1, elf->fp); + + // write user's section header + section_offset = phdr_offset + sizeof(Elf64_Phdr) * nb_phdrs; + + _x64_elf_write_section_headers(elf, nb_sections, section_offset, strtab_offset, shstrtab_offset); + __x64_elf_write_text (elf, rx_base, 0, cs->offset + cs->data_len); __x64_elf_write_rodata(elf, r_base, ros->offset, ros->data_len); - if (6 == nb_phdrs) { - __x64_elf_write_data(elf, rw_base, x64->dynamic->offset, - x64->dynamic->data_len + x64->got_plt->data_len + ds->data_len); + if (x64->got_plt) + __x64_elf_write_data(elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len + x64->got_plt->data_len + ds->data_len); + else + __x64_elf_write_data(elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len + ds->data_len); - __x64_elf_write_dynamic(elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len); - } else { - __x64_elf_write_data(elf, rw_base, ds->offset, ds->data_len); - } -#endif + __x64_elf_write_dynamic(elf, rw_base, x64->dynamic->offset, x64->dynamic->data_len); elf_write_sections(elf); elf_write_symtab (elf); @@ -484,12 +648,12 @@ static int _x64_elf_write_exec(scf_elf_context_t* elf, const char* sysroot) scf_elf_ops_t elf_ops_x64 = { - .machine = "x64", + .machine = "x64", - .open = elf_open, - .close = elf_close, + .open = elf_open, + .close = elf_close, - .add_sym = elf_add_sym, + .add_sym = elf_add_sym, .add_section = elf_add_section, .add_rela_section = elf_add_rela_section, @@ -501,7 +665,7 @@ scf_elf_ops_t elf_ops_x64 = .read_relas = elf_read_relas, .read_section = elf_read_section, - .write_rel = _x64_elf_write_rel, + .write_rel = _x64_elf_write_rel, + .write_dyn = _x64_elf_write_dyn, .write_exec = _x64_elf_write_exec, }; - diff --git a/elf/scf_elf_x64.h b/elf/scf_elf_x64.h index ee1aa19..f17efb1 100644 --- a/elf/scf_elf_x64.h +++ b/elf/scf_elf_x64.h @@ -4,8 +4,10 @@ #include"scf_elf.h" #include"scf_elf_native.h" -int __x64_elf_add_dyn (elf_native_t* x64, const char* sysroot); -int __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, elf_section_t* cs); +int __x64_elf_add_dyn (elf_native_t* x64, const char* sysroot); +void __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, elf_section_t* cs); + +int __x64_so_add_dyn(elf_native_t* x64, const char* sysroot); int __x64_elf_write_phdr (scf_elf_context_t* elf, uint64_t rx_base, uint64_t offset, uint32_t nb_phdrs); int __x64_elf_write_interp (scf_elf_context_t* elf, uint64_t rx_base, uint64_t offset, uint64_t len); @@ -15,4 +17,3 @@ int __x64_elf_write_data (scf_elf_context_t* elf, uint64_t rw_base, uint64_t o int __x64_elf_write_dynamic(scf_elf_context_t* elf, uint64_t rw_base, uint64_t offset, uint64_t len); #endif - diff --git a/elf/scf_elf_x64_so.c b/elf/scf_elf_x64_so.c index 7c613ca..366955b 100644 --- a/elf/scf_elf_x64_so.c +++ b/elf/scf_elf_x64_so.c @@ -17,6 +17,16 @@ static uint32_t _x64_elf_hash(const uint8_t* p) return k; } +uint32_t elf_new_hash(const char *s) +{ + uint32_t h = 5381; + + for (unsigned char c = *s; c != '\0'; c = *++s) + h = (h << 5) + h + c; + + return h; +} + static int _x64_elf_add_interp(elf_native_t* x64, elf_section_t** ps) { elf_section_t* s; @@ -33,8 +43,9 @@ static int _x64_elf_add_interp(elf_native_t* x64, elf_section_t** ps) char* interp = "/lib64/ld-linux-x86-64.so.2"; size_t len = strlen(interp); + size_t bytes = (len + 1 + 7) >> 3 << 3; - s->data = malloc(len + 1); + s->data = malloc(bytes); if (!s->data) { scf_string_free(s->name); free(s); @@ -42,7 +53,7 @@ static int _x64_elf_add_interp(elf_native_t* x64, elf_section_t** ps) } memcpy(s->data, interp, len); s->data[len] = '\0'; - s->data_len = len + 1; + s->data_len = bytes; s->index = 1; @@ -142,6 +153,58 @@ static int _x64_elf_add_gnu_version_r(elf_native_t* x64, elf_section_t** ps) return 0; } +static int _x64_elf_add_gnu_hash(elf_native_t* x64, elf_section_t** ps) +{ + elf_section_t* s; + + s = calloc(1, sizeof(elf_section_t)); + if (!s) + return -ENOMEM; + + s->name = scf_string_cstr(".gnu.hash"); + if (!s->name) { + free(s); + return -ENOMEM; + } + +#define HASH_BUCKETS 3 +#define HASH_BLOOMS 1 + + int n_syms = x64->dynsyms->size; + + if (x64->dyn_relas) + n_syms -= x64->dyn_relas->size; + + int len = sizeof(uint32_t) * 4 + sizeof(uint64_t) * HASH_BLOOMS + + sizeof(uint32_t) * HASH_BUCKETS + + sizeof(uint32_t) * n_syms; + + s->data = calloc(1, len); + if (!s->data) { + scf_string_free(s->name); + free(s); + return -ENOMEM; + } + s->data_len = len; + + s->index = 1; + + s->sh.sh_type = SHT_GNU_HASH; + s->sh.sh_flags = SHF_ALLOC; + s->sh.sh_addralign = 8; + + int ret = scf_vector_add(x64->sections, s); + if (ret < 0) { + scf_string_free(s->name); + free(s->data); + free(s); + return -ENOMEM; + } + + *ps = s; + return 0; +} + static int _x64_elf_add_dynsym(elf_native_t* x64, elf_section_t** ps) { elf_section_t* s; @@ -216,7 +279,7 @@ static int _x64_elf_add_dynstr(elf_native_t* x64, elf_section_t** ps) return 0; } -static int _x64_elf_add_dynamic(elf_native_t* x64, elf_section_t** ps) +static int _x64_elf_add_dynamic(elf_native_t* x64, elf_section_t** ps, int n_tags) { elf_section_t* s; @@ -230,17 +293,19 @@ static int _x64_elf_add_dynamic(elf_native_t* x64, elf_section_t** ps) return -ENOMEM; } - int nb_tags = x64->dyn_needs->size + 11 + 1; + n_tags += 4 + 1; // must have tags: STRTAB, SYMTAB, STRSZ, SYMENT, NULL + if (x64->dyn_needs) + n_tags += x64->dyn_needs->size; - s->data = calloc(nb_tags, sizeof(Elf64_Dyn)); + s->data = calloc(n_tags, sizeof(Elf64_Dyn)); if (!s->data) { scf_string_free(s->name); free(s); return -ENOMEM; } - s->data_len = nb_tags * sizeof(Elf64_Dyn); + s->data_len = n_tags * sizeof(Elf64_Dyn); - s->index = 1; + s->index = 1; s->sh.sh_type = SHT_PROGBITS; s->sh.sh_flags = SHF_ALLOC | SHF_WRITE; @@ -426,13 +491,11 @@ static int _section_cmp(const void* v0, const void* v1) return 0; } -int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) +static void __x64_sym_set_section(elf_native_t* x64) { - elf_section_t* s; - elf_sym_t* sym; - Elf64_Rela* rela; - + elf_sym_t* sym; int i; + for (i = x64->symbols->size - 1; i >= 0; i--) { sym = x64->symbols->data[i]; @@ -448,27 +511,17 @@ int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) sym->section = x64->sections->data[shndx - 1]; } } +} - char* sh_names[] = { - ".interp", - ".dynsym", - ".dynstr", -// ".gnu.version_r", - ".rela.plt", - ".plt", - - ".text", - ".rodata", - - ".dynamic", - ".got.plt", - ".data", - }; +static void __x64_section_update_index(elf_native_t* x64, int n) +{ + elf_section_t* s; + int i; for (i = 0; i < x64->sections->size; i++) { s = x64->sections->data[i]; - s->index = x64->sections->size + 1 + sizeof(sh_names) / sizeof(sh_names[0]); + s->index = x64->sections->size + 1 + n; scf_logd("s: %s, link: %d, info: %d\n", s->name->data, s->sh.sh_link, s->sh.sh_info); @@ -484,6 +537,125 @@ int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) s->info = x64->sections->data[s->sh.sh_info - 1]; } } +} + +static void __x64_section_update_index2(elf_native_t* x64, char** sh_names, int n) +{ + elf_section_t* s; + int i; + int j; + + for (i = 0; i < x64->sections->size; i++) { + s = x64->sections->data[i]; + + for (j = 0; j < n; j++) { + if (!strcmp(s->name->data, sh_names[j])) + break; + } + + if (j < n) + s->index = j + 1; + + scf_logd("i: %d, s: %s, index: %d\n", i, s->name->data, s->index); + } + + qsort(x64->sections->data, x64->sections->size, sizeof(void*), _section_cmp); + + for (i = n; i < x64->sections->size; i++) { + s = x64->sections->data[i]; + + s->index = i + 1; + } + + for (i = 0; i < x64->sections->size; i++) { + s = x64->sections->data[i]; + + scf_logd("i: %d, s: %s, index: %d\n", i, s->name->data, s->index); + + if (s->link) { + scf_logd("link: %s, index: %d\n", s->link->name->data, s->link->index); + s->sh.sh_link = s->link->index; + } + + if (s->info) { + scf_logd("info: %s, index: %d\n", s->info->name->data, s->info->index); + s->sh.sh_info = s->info->index; + } + } +} + +static void __x64_sym_set_section2(elf_native_t* x64) +{ + elf_sym_t* sym; + int i; + + for (i = 0; i < x64->symbols->size; i++) { + sym = x64->symbols->data[i]; + + if (sym->section) { + scf_logd("sym: %s, index: %d->%d\n", sym->name->data, sym->sym.st_shndx, sym->section->index); + sym->sym.st_shndx = sym->section->index; + } + } +} + +static int __x64_elf_dyn_needs(elf_native_t* x64, const char* sysroot, scf_string_t* dynstr) +{ + scf_string_t* need; + Elf64_Dyn* dyns = (Elf64_Dyn*)x64->dynamic->data; + + int pre = strlen(sysroot); + int ret; + int i; + + if ('/' != sysroot[pre - 1]) + pre++; + + pre += strlen("x64/"); + + for (i = 0; i < x64->dyn_needs->size; i++) { + need = x64->dyn_needs->data[i]; + + dyns[i].d_tag = DT_NEEDED; + dyns[i].d_un.d_val = dynstr->len; + + scf_logi("i: %d, %s, %s\n", i, need->data, need->data + pre); + + if (!strncmp(need->data, sysroot, strlen(sysroot))) + ret = scf_string_cat_cstr_len(dynstr, need->data + pre, need->len - pre + 1); + else + ret = scf_string_cat_cstr_len(dynstr, need->data, need->len + 1); + + if (ret < 0) + return ret; + } + return 0; +} + +int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) +{ + elf_section_t* s; + elf_sym_t* sym; + + static char* sh_names[] = { + ".interp", + ".dynsym", + ".dynstr", +// ".gnu.version_r", + ".rela.plt", + ".plt", + + ".text", + ".rodata", + + ".dynamic", + ".got.plt", + ".data", + }; + + __x64_sym_set_section(x64); + + __x64_section_update_index(x64, sizeof(sh_names) / sizeof(sh_names[0])); _x64_elf_add_interp(x64, &x64->interp); _x64_elf_add_dynsym(x64, &x64->dynsym); @@ -494,30 +666,21 @@ int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) _x64_elf_add_rela_plt(x64, &x64->rela_plt); _x64_elf_add_plt(x64, &x64->plt); - _x64_elf_add_dynamic(x64, &x64->dynamic); + _x64_elf_add_dynamic(x64, &x64->dynamic, 4); _x64_elf_add_got_plt(x64, &x64->got_plt); scf_string_t* str = scf_string_alloc(); + int i; char c = '\0'; scf_string_cat_cstr_len(str, &c, 1); - Elf64_Sym* syms = (Elf64_Sym* )x64->dynsym->data; - Elf64_Sym sym0 = {0}; - - sym0.st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE); - memcpy(&syms[0], &sym0, sizeof(Elf64_Sym)); - for (i = 0; i < x64->dynsyms->size; i++) { - elf_sym_t* xsym = x64->dynsyms->data[i]; + sym = x64->dynsyms->data[i]; - memcpy(&syms[i + 1], &xsym->sym, sizeof(Elf64_Sym)); + sym->sym.st_name = str->len; - syms[i + 1].st_name = str->len; - - scf_logd("i: %d, st_value: %#lx\n", i, syms[i + 1].st_value); - - scf_string_cat_cstr_len(str, xsym->name->data, xsym->name->len + 1); + scf_string_cat_cstr_len(str, sym->name->data, sym->name->len + 1); } #if 0 @@ -541,30 +704,21 @@ int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) scf_string_cat_cstr_len(str, "GLIBC_2.4", strlen("GLIBC_2.4") + 1); #endif - Elf64_Dyn* dyns = (Elf64_Dyn*)x64->dynamic->data; - - size_t prefix = strlen(sysroot); - - if ('/' != sysroot[prefix - 1]) - prefix++; + __x64_elf_dyn_needs(x64, sysroot, str); - prefix += strlen("x64/"); + if (str->len & 0x7) { + size_t n = 8 - (str->len & 0x7); - for (i = 0; i < x64->dyn_needs->size; i++) { - scf_string_t* needed = x64->dyn_needs->data[i]; + int ret = scf_string_fill_zero(str, n); + if (ret < 0) + return ret; + } - dyns[i].d_tag = DT_NEEDED; - dyns[i].d_un.d_val = str->len; + Elf64_Dyn* dyns = (Elf64_Dyn*)x64->dynamic->data; - scf_logi("i: %d, %s, %s\n", i, needed->data, needed->data + prefix); + i = x64->dyn_needs->size; - if (!strncmp(needed->data, sysroot, strlen(sysroot))) - scf_string_cat_cstr_len(str, needed->data + prefix, needed->len - prefix + 1); - else - scf_string_cat_cstr_len(str, needed->data, needed->len + 1); - } - - dyns[i].d_tag = DT_STRTAB; + dyns[i ].d_tag = DT_STRTAB; dyns[i + 1].d_tag = DT_SYMTAB; dyns[i + 2].d_tag = DT_STRSZ; dyns[i + 3].d_tag = DT_SYMENT; @@ -607,70 +761,257 @@ int __x64_elf_add_dyn(elf_native_t* x64, const char* sysroot) x64->gnu_version_r->info = x64->interp; #endif - for (i = 0; i < x64->sections->size; i++) { - s = x64->sections->data[i]; + __x64_section_update_index2(x64, sh_names, sizeof(sh_names) / sizeof(sh_names[0])); - int j; - for (j = 0; j < sizeof(sh_names) / sizeof(sh_names[0]); j++) { - if (!strcmp(s->name->data, sh_names[j])) - break; - } + __x64_sym_set_section2(x64); + return 0; +} - if (j < sizeof(sh_names) / sizeof(sh_names[0])) - s->index = j + 1; +static int _sym_hash_n_cmp(const void* v0, const void* v1) +{ + const elf_sym_t* s0 = *(const elf_sym_t**)v0; + const elf_sym_t* s1 = *(const elf_sym_t**)v1; - scf_logd("i: %d, s: %s, index: %d\n", i, s->name->data, s->index); - } + if (s0->hash_n < s1->hash_n) + return -1; + else if (s0->hash_n > s1->hash_n) + return 1; + return 0; +} - qsort(x64->sections->data, x64->sections->size, sizeof(void*), _section_cmp); +int __x64_so_add_dyn(elf_native_t* x64, const char* sysroot) +{ + elf_section_t* s; + elf_sym_t* sym; - int j = sizeof(sh_names) / sizeof(sh_names[0]); + static char* sh_names[] = { + ".gnu.hash", + ".dynsym", + ".dynstr", - for (i = j; i < x64->sections->size; i++) { - s = x64->sections->data[i]; + ".text", + ".rodata", - s->index = i + 1; + ".dynamic", + ".data", + }; + + static char* sh_names_plt[] = { + ".gnu.hash", + ".dynsym", + ".dynstr", + ".rela.plt", + ".plt", + + ".text", + ".rodata", + + ".dynamic", + ".got.plt", + ".data", + }; + + __x64_sym_set_section(x64); + + if (x64->dyn_needs) { + __x64_section_update_index(x64, sizeof(sh_names_plt) / sizeof(sh_names_plt[0])); + + _x64_elf_add_rela_plt(x64, &x64->rela_plt); + _x64_elf_add_plt (x64, &x64->plt); + _x64_elf_add_dynamic (x64, &x64->dynamic, 4); + _x64_elf_add_got_plt (x64, &x64->got_plt); + } else { + __x64_section_update_index(x64, sizeof(sh_names) / sizeof(sh_names[0])); + + _x64_elf_add_dynamic(x64, &x64->dynamic, 1); } - for (i = 0; i < x64->sections->size; i++) { - s = x64->sections->data[i]; + _x64_elf_add_gnu_hash(x64, &x64->gnu_hash); + _x64_elf_add_dynsym (x64, &x64->dynsym); + _x64_elf_add_dynstr (x64, &x64->dynstr); - scf_logd("i: %d, s: %s, index: %d\n", i, s->name->data, s->index); + scf_string_t* str = scf_string_alloc(); - if (s->link) { - scf_logd("link: %s, index: %d\n", s->link->name->data, s->link->index); - s->sh.sh_link = s->link->index; - } + char c = '\0'; + int j = 0; + int i; - if (s->info) { - scf_logd("info: %s, index: %d\n", s->info->name->data, s->info->index); - s->sh.sh_info = s->info->index; + if (x64->dyn_relas) + j = x64->dyn_relas->size; + + scf_string_cat_cstr_len(str, &c, 1); + + for (i = 0; i < x64->dynsyms->size; i++) { + sym = x64->dynsyms->data[i]; + + sym->sym.st_name = str->len; + + scf_string_cat_cstr_len(str, sym->name->data, sym->name->len + 1); + + if (i >= j) { + sym->hash = elf_new_hash(sym->name->data); + sym->hash_n = sym->hash % HASH_BUCKETS; } } -#if 1 - for (i = 0; i < x64->symbols->size; i++) { - sym = x64->symbols->data[i]; + qsort(x64->dynsyms->data + j, x64->dynsyms->size - j, sizeof(void*), _sym_hash_n_cmp); - if (sym->section) { - scf_logd("sym: %s, index: %d->%d\n", sym->name->data, sym->sym.st_shndx, sym->section->index); - sym->sym.st_shndx = sym->section->index; + uint32_t* hash_header = (uint32_t*)x64->gnu_hash->data; + uint64_t* hash_bloom = (uint64_t*)(hash_header + 4); + uint32_t* hash_buckets = (uint32_t*)(hash_bloom + HASH_BLOOMS); + uint32_t* hash_values = (uint32_t*)(hash_buckets + HASH_BUCKETS); + + hash_header[0] = HASH_BUCKETS; + hash_header[1] = j + 1; + hash_header[2] = HASH_BLOOMS; + hash_header[3] = 6; + + for (i = j; i < x64->dynsyms->size; i++) { + sym = x64->dynsyms->data[i]; + + uint32_t h1 = sym->hash & 0x3f; + uint32_t h2 = (sym->hash >> 6) & 0x3f; + uint32_t n = h1 & (HASH_BLOOMS - 1); + + hash_bloom[n] |= (1ULL << h1) | (1ULL << h2); + + if (hash_buckets[sym->hash_n]) { + hash_values [i - j - 1] &= ~0x1; + hash_values [i - j] = sym->hash | 0x1; + } else { + hash_buckets[sym->hash_n] = i + 1; + hash_values [i - j] = sym->hash | 0x1; } } -#endif + + if (x64->dyn_needs) { + __x64_elf_dyn_needs(x64, sysroot, str); + i = x64->dyn_needs->size; + } else + i = 0; + + if (str->len & 0x7) { + size_t n = 8 - (str->len & 0x7); + + int ret = scf_string_fill_zero(str, n); + if (ret < 0) + return ret; + } + + Elf64_Dyn* dyns = (Elf64_Dyn*)x64->dynamic->data; + + dyns[i ].d_tag = DT_GNU_HASH; + dyns[i + 1].d_tag = DT_STRTAB; + dyns[i + 2].d_tag = DT_SYMTAB; + dyns[i + 3].d_tag = DT_STRSZ; + dyns[i + 4].d_tag = DT_SYMENT; + + dyns[i ].d_un.d_ptr = (uintptr_t)x64->gnu_hash; + dyns[i + 1].d_un.d_ptr = (uintptr_t)x64->dynstr; + dyns[i + 2].d_un.d_ptr = (uintptr_t)x64->dynsym; + dyns[i + 3].d_un.d_val = str->len; + dyns[i + 4].d_un.d_val = sizeof(Elf64_Sym); + + if (x64->dyn_needs) { + dyns[i + 5].d_tag = DT_PLTGOT; + dyns[i + 6].d_tag = DT_PLTRELSZ; + dyns[i + 7].d_tag = DT_PLTREL; + dyns[i + 8].d_tag = DT_JMPREL; + + dyns[i + 5].d_un.d_ptr = (uintptr_t)x64->got_plt; + dyns[i + 6].d_un.d_ptr = sizeof(Elf64_Rela); + dyns[i + 7].d_un.d_ptr = DT_RELA; + dyns[i + 8].d_un.d_ptr = (uintptr_t)x64->rela_plt; + + dyns[i + 9].d_tag = DT_NULL; + dyns[i + 9].d_un.d_ptr = 0; + } else { + dyns[i + 5].d_tag = DT_NULL; + dyns[i + 5].d_un.d_ptr = 0; + } + + x64->dynstr->data = str->data; + x64->dynstr->data_len = str->len; + + str->data = NULL; + str->len = 0; + str->capacity = 0; + scf_string_free(str); + str = NULL; + + x64->gnu_hash->link = x64->dynsym; + x64->dynsym ->link = x64->dynstr; + + if (x64->rela_plt) { + x64->rela_plt->link = x64->dynsym; + x64->rela_plt->info = x64->got_plt; + + __x64_section_update_index2(x64, sh_names_plt, sizeof(sh_names_plt) / sizeof(sh_names_plt[0])); + } else + __x64_section_update_index2(x64, sh_names, sizeof(sh_names) / sizeof(sh_names[0])); + + __x64_sym_set_section2(x64); return 0; } -int __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, elf_section_t* cs) +static void __x64_dynamic_update(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base) +{ + Elf64_Dyn* dtags = (Elf64_Dyn*)x64->dynamic->data; + int i; + + for (i = 0; i < x64->dynamic->data_len / sizeof(Elf64_Dyn); i++) { + + elf_section_t* s = (elf_section_t*)dtags[i].d_un.d_ptr; + + switch (dtags[i].d_tag) { + + case DT_GNU_HASH: + case DT_SYMTAB: + case DT_STRTAB: + case DT_JMPREL: + case DT_VERSYM: + dtags[i].d_un.d_ptr = s->offset + rx_base; + s->sh.sh_addr = s->offset + rx_base; + break; + + case DT_PLTGOT: + dtags[i].d_un.d_ptr = s->offset + rw_base; + s->sh.sh_addr = s->offset + rw_base; + break; + default: + break; + }; + } +} + +static void __x64_dynsym_update(elf_native_t* x64) { - uint64_t cs_base = rx_base + cs->offset; + Elf64_Sym* syms = (Elf64_Sym*)x64->dynsym->data; + Elf64_Sym sym0 = {0}; + elf_sym_t* sym; + int i; -// x64->gnu_version_r->sh.sh_addr = rx_base + x64->gnu_version_r->offset; + sym0.st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE); + + memcpy(&syms[0], &sym0, sizeof(Elf64_Sym)); + + for (i = 0; i < x64->dynsyms->size; i++) { + sym = x64->dynsyms->data[i]; + + memcpy(&syms[i + 1], &sym->sym, sizeof(Elf64_Sym)); + } +} + +static void __x64_plt_link(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, elf_section_t* cs) +{ + uint64_t cs_base = rx_base + cs->offset; + + if (x64->interp) + x64->interp->sh.sh_addr = rx_base + x64->interp->offset; x64->rela_plt->sh.sh_addr = rx_base + x64->rela_plt->offset; x64->dynamic->sh.sh_addr = rw_base + x64->dynamic->offset; x64->got_plt->sh.sh_addr = rw_base + x64->got_plt->offset; - x64->interp->sh.sh_addr = rx_base + x64->interp->offset; x64->plt->sh.sh_addr = rx_base + x64->plt->offset; scf_logd("rw_base: %#lx, offset: %#lx\n", rw_base, x64->got_plt->offset); @@ -681,9 +1022,9 @@ int __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, el uint64_t* got_plt = (uint64_t* )x64->got_plt->data; uint8_t* plt = (uint8_t* )x64->plt->data; - uint64_t got_addr = x64->got_plt->sh.sh_addr + 8; - uint64_t plt_addr = x64->plt->sh.sh_addr; - int32_t offset = got_addr - plt_addr - 6; + uint64_t got_addr = x64->got_plt->sh.sh_addr + 8; + uint64_t plt_addr = x64->plt->sh.sh_addr; + int32_t offset = got_addr - plt_addr - 6; got_plt[0] = x64->dynamic->sh.sh_addr; got_plt[1] = 0; @@ -741,34 +1082,20 @@ int __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, el memcpy(cs->data + r->r_offset, &offset, sizeof(offset)); } +} - Elf64_Dyn* dtags = (Elf64_Dyn*)x64->dynamic->data; - - for (i = x64->dyn_needs->size; i < x64->dynamic->data_len / sizeof(Elf64_Dyn); i++) { - - elf_section_t* s = (elf_section_t*)dtags[i].d_un.d_ptr; - - switch (dtags[i].d_tag) { +void __x64_elf_post_dyn(elf_native_t* x64, uint64_t rx_base, uint64_t rw_base, elf_section_t* cs) +{ + x64->dynamic->sh.sh_addr = rw_base + x64->dynamic->offset; - case DT_SYMTAB: - case DT_STRTAB: - case DT_JMPREL: - case DT_VERNEED: - case DT_VERSYM: - dtags[i].d_un.d_ptr = s->offset + rx_base; - s->sh.sh_addr = s->offset + rx_base; - break; + if (x64->gnu_hash) + x64->gnu_hash->sh.sh_addr = rx_base + x64->gnu_hash->offset; - case DT_PLTGOT: - dtags[i].d_un.d_ptr = s->offset + rw_base; - s->sh.sh_addr = s->offset + rw_base; - break; - default: - break; - }; - } + if (x64->plt) + __x64_plt_link(x64, rx_base, rw_base, cs); - return 0; + __x64_dynamic_update(x64, rx_base, rw_base); + __x64_dynsym_update (x64); } int __x64_elf_write_phdr(scf_elf_context_t* elf, uint64_t rx_base, uint64_t offset, uint32_t nb_phdrs) @@ -874,4 +1201,3 @@ int __x64_elf_write_dynamic(scf_elf_context_t* elf, uint64_t rw_base, uint64_t o fwrite(&ph_dynamic, sizeof(Elf64_Phdr), 1, elf->fp); return 0; } - diff --git a/examples/init_struct_array.c b/examples/init_struct_array.c index 5aaab2b..2a7b76b 100644 --- a/examples/init_struct_array.c +++ b/examples/init_struct_array.c @@ -12,18 +12,20 @@ S s = .type = 123, }; +#define A 1 + int a[4] = { - .0 = 1, - .1 = 2, - .2 = 3, - .3 = 4, + .0 = 1, + .A = 2, + [2] = 3, + .3 = 4, }; int main() { printf("s->name: %s\n", s->name); - printf("s->type: %d\n", s->type); + printf("s->type: %d\n", s.type); printf("a[0]: %d\n", a[0]); printf("a[1]: %d\n", a[1]); diff --git a/examples/struct_pointer_opt.c b/examples/struct_pointer_opt.c index da1fdbc..fae9a6c 100644 --- a/examples/struct_pointer_opt.c +++ b/examples/struct_pointer_opt.c @@ -12,6 +12,7 @@ int f() S s = {&a, &b}; int** pp = &s->p0; + s.p1 = NULL; **pp += 3; return a; diff --git a/lex/scf_lex.c b/lex/scf_lex.c index e6f43a3..04caba8 100644 --- a/lex/scf_lex.c +++ b/lex/scf_lex.c @@ -4,6 +4,7 @@ static scf_key_word_t key_words[] = { {"if", SCF_LEX_WORD_KEY_IF}, {"else", SCF_LEX_WORD_KEY_ELSE}, + {"endif", SCF_LEX_WORD_KEY_ENDIF}, {"for", SCF_LEX_WORD_KEY_FOR}, {"while", SCF_LEX_WORD_KEY_WHILE}, @@ -1422,6 +1423,9 @@ int scf_lex_pop_word(scf_lex_t* lex, scf_lex_word_t** pword) switch (w1->type) { case SCF_LEX_WORD_KEY_INCLUDE: + case SCF_LEX_WORD_KEY_IF: + case SCF_LEX_WORD_KEY_ELSE: + case SCF_LEX_WORD_KEY_ENDIF: scf_lex_push_word(lex, w1); *pword = w; diff --git a/lib/x64/scf_object.o b/lib/x64/scf_object.o index 0ef68f52f5cf28abde6654895f42f1e7c0afffa3..a32511e480cb98a79b80134e0e21e2655a503e06 100644 GIT binary patch delta 279 zcmdmM_R?&^0>%Rq7n(CROuT8&G=Y1fqu68vMh(UZlOq}J8UIh7$!O1cj*kHhVkSRi zH0RV3fbi~3R%9|~;t-r1#bn25F?lAFJ!8P+n?Ul#WJzXw#tV}pf#jXdGnwnzltVo_ zpL#UDnE=%3(OLV$qx-l=>wywikKTX}9-W~L9-Xc~CjViVne4#DBHP(p@&Et-&ej4T z>CtPo17zC^5aH4JCU$cG*9u0)1Dk(x*E2C@Os?e9WsKOooX>-ipGjC~mq-h99g~8P z*gPir$$$7CY`!IMiHT_<+hjgLG06>VATt;kn1EOsi2neM2Z6fDcZJ0nTQ`3d=4WQi K*vu+!!2$rkZeR-l delta 254 zcmaE9w%2UJ0>&8=7n(ChOuT8&l)*jGQEajSqXuKf67;O(3~vvLv%TEqY6r-q7a(GD1=k8j#u=LxdFq)M9VRd2(`5|Xe4fvP zk+-Qy#D}?#Nk&M_hDmg?n!tn29D)~^B&*m!+87v^fLI2I|NMu5p2;7D#U;~`WF?Vg S^ENAr@G~= argc) { @@ -238,11 +246,13 @@ int main(int argc, char* argv[]) #define MAIN_ADD_FILES(_objs, _sofiles, _arch) \ do { \ - int ret = add_sys_files(objs, sysroot, _arch, _objs, sizeof(_objs) / sizeof(_objs[0])); \ - if (ret < 0) \ - return ret; \ + if (!dyn) { \ + int ret = add_sys_files(objs, sysroot, _arch, _objs, sizeof(_objs) / sizeof(_objs[0])); \ + if (ret < 0) \ + return ret; \ + } \ \ - ret = add_sys_files(sofiles, sysroot, _arch, _sofiles, sizeof(_sofiles) / sizeof(_sofiles[0])); \ + int ret = add_sys_files(sofiles, sysroot, _arch, _sofiles, sizeof(_sofiles) / sizeof(_sofiles[0])); \ if (ret < 0) \ return ret; \ } while (0) @@ -262,7 +272,7 @@ int main(int argc, char* argv[]) return -1; } - if (scf_elf_link(objs, afiles, sofiles, sysroot, arch, exec) < 0) { + if (scf_elf_link(objs, afiles, sofiles, sysroot, arch, exec, dyn) < 0) { scf_loge("\n"); return -1; } diff --git a/parse/scf_dfa_expr.c b/parse/scf_dfa_expr.c index 59697c6..39575f0 100644 --- a/parse/scf_dfa_expr.c +++ b/parse/scf_dfa_expr.c @@ -339,7 +339,7 @@ static int _expr_action_binary_op(scf_dfa_t* dfa, scf_vector_t* words, void* dat return SCF_DFA_ERROR; } - if (SCF_LEX_WORD_ARROW == w->type) { + if (SCF_LEX_WORD_ARROW == w->type || SCF_LEX_WORD_DOT == w->type) { assert(md->current_struct); if (!md->parent_block) diff --git a/parse/scf_dfa_init_data.c b/parse/scf_dfa_init_data.c index 29f8ca9..5669959 100644 --- a/parse/scf_dfa_init_data.c +++ b/parse/scf_dfa_init_data.c @@ -210,12 +210,31 @@ static int _data_action_member(scf_dfa_t* dfa, scf_vector_t* words, void* data) dfa_data_t* d = data; scf_lex_word_t* w = words->data[words->size - 1]; init_module_data_t* md = d->module_datas[dfa_module_init_data.index]; + scf_variable_t* v; + scf_type_t* t; if (md->current_dim >= md->current_n) { scf_loge("init data not right, file: %s, line: %d\n", w->file->data, w->line); return SCF_DFA_ERROR; } + assert(d->current_var); + + t = NULL; + scf_ast_find_type_type(&t, parse->ast, d->current_var->type); + if (!t->scope) { + scf_loge("base type '%s' has no member var '%s', file: %s, line: %d\n", + t->name->data, w->text->data, w->file->data, w->line); + return SCF_DFA_ERROR; + } + + v = scf_scope_find_variable(t->scope, w->text->data); + if (!v) { + scf_loge("member var '%s' NOT found in struct '%s', file: %s, line: %d\n", + w->text->data, t->name->data, w->file->data, w->line); + return SCF_DFA_ERROR; + } + md->current_index[md->current_dim].w = w; return SCF_DFA_NEXT_WORD; @@ -285,9 +304,13 @@ static int _dfa_init_module_init_data(scf_dfa_t* dfa) SCF_DFA_MODULE_NODE(dfa, init_data, lb, scf_dfa_is_lb, _data_action_lb); SCF_DFA_MODULE_NODE(dfa, init_data, rb, scf_dfa_is_rb, _data_action_rb); + SCF_DFA_MODULE_NODE(dfa, init_data, ls, scf_dfa_is_ls, scf_dfa_action_next); + SCF_DFA_MODULE_NODE(dfa, init_data, rs, scf_dfa_is_rs, scf_dfa_action_next); + SCF_DFA_MODULE_NODE(dfa, init_data, dot, scf_dfa_is_dot, scf_dfa_action_next); SCF_DFA_MODULE_NODE(dfa, init_data, member, scf_dfa_is_identity, _data_action_member); - SCF_DFA_MODULE_NODE(dfa, init_data, index, scf_dfa_is_const_integer, _data_action_index); + SCF_DFA_MODULE_NODE(dfa, init_data, index0, scf_dfa_is_const_integer, _data_action_index); + SCF_DFA_MODULE_NODE(dfa, init_data, index1, scf_dfa_is_const_integer, _data_action_index); SCF_DFA_MODULE_NODE(dfa, init_data, assign, scf_dfa_is_assign, scf_dfa_action_next); scf_parse_t* parse = dfa->priv; @@ -312,9 +335,13 @@ static int _dfa_init_syntax_init_data(scf_dfa_t* dfa) SCF_DFA_GET_MODULE_NODE(dfa, init_data, lb, lb); SCF_DFA_GET_MODULE_NODE(dfa, init_data, rb, rb); + SCF_DFA_GET_MODULE_NODE(dfa, init_data, ls, ls); + SCF_DFA_GET_MODULE_NODE(dfa, init_data, rs, rs); + SCF_DFA_GET_MODULE_NODE(dfa, init_data, dot, dot); SCF_DFA_GET_MODULE_NODE(dfa, init_data, member, member); - SCF_DFA_GET_MODULE_NODE(dfa, init_data, index, index); + SCF_DFA_GET_MODULE_NODE(dfa, init_data, index0, index0); + SCF_DFA_GET_MODULE_NODE(dfa, init_data, index1, index1); SCF_DFA_GET_MODULE_NODE(dfa, init_data, assign, assign); SCF_DFA_GET_MODULE_NODE(dfa, expr, entry, expr); @@ -331,7 +358,9 @@ static int _dfa_init_syntax_init_data(scf_dfa_t* dfa) // init expr for member of data scf_dfa_node_add_child(lb, dot); + scf_dfa_node_add_child(lb, ls); scf_dfa_node_add_child(comma, dot); + scf_dfa_node_add_child(comma, ls); scf_dfa_node_add_child(lb, expr); scf_dfa_node_add_child(expr, comma); @@ -339,11 +368,15 @@ static int _dfa_init_syntax_init_data(scf_dfa_t* dfa) scf_dfa_node_add_child(expr, rb); scf_dfa_node_add_child(dot, member); + scf_dfa_node_add_child(dot, index0); scf_dfa_node_add_child(member, assign); + scf_dfa_node_add_child(index0, assign); scf_dfa_node_add_child(assign, expr); - scf_dfa_node_add_child(dot, index); - scf_dfa_node_add_child(index, assign); + scf_dfa_node_add_child(ls, index1); + scf_dfa_node_add_child(index1, rs); + scf_dfa_node_add_child(rs, ls); + scf_dfa_node_add_child(rs, assign); return 0; } diff --git a/parse/scf_dfa_macro.c b/parse/scf_dfa_macro.c index 68d4280..66ba0ea 100644 --- a/parse/scf_dfa_macro.c +++ b/parse/scf_dfa_macro.c @@ -4,9 +4,170 @@ extern scf_dfa_module_t dfa_module_macro; +static inline int _macro_action_if(scf_dfa_t* dfa, scf_vector_t* words, void* data) +{ + scf_lex_word_t* w = words->data[words->size - 1]; + scf_lex_word_t* w1 = dfa->ops->pop_word(dfa); + scf_lex_word_t* w2; + scf_lex_word_t* w3; + + if (!w1) + return SCF_DFA_ERROR; + + if (!scf_lex_is_const_integer(w1)) { + scf_loge("the condition after '#if' must be a const integer, file: %s, line: %d\n", w->file->data, w->line); + return SCF_DFA_ERROR; + } + + int flag = w1->data.u32; + + scf_lex_word_free(w1); + w1 = NULL; + + while (1) { + w1 = dfa->ops->pop_word(dfa); + if (!w1) + return SCF_DFA_ERROR; + + int type = w1->type; + + scf_lex_word_free(w1); + w1 = NULL; + + if (SCF_LEX_WORD_EOF == type) { + scf_loge("'#endif' NOT found for '#if' in file: %s, line: %d\n", w->file->data, w->line); + return SCF_DFA_ERROR; + } + + if (SCF_LEX_WORD_LF == type) + break; + } + + scf_lex_word_t* h = NULL; + + int n_if = 1; + + while (1) { + w1 = dfa->ops->pop_word(dfa); + if (!w1) + goto error; + w1->next = NULL; + + if (SCF_LEX_WORD_EOF == w1->type) { + scf_loge("'#endif' NOT found for '#if' in file: %s, line: %d\n", w->file->data, w->line); + scf_lex_word_free(w1); + goto error; + } + + if (SCF_LEX_WORD_HASH == w1->type) { + w2 = dfa->ops->pop_word(dfa); + if (!w2) { + scf_lex_word_free(w1); + goto error; + } + w2->next = NULL; + + scf_logd("'#%s' file: %s, line: %d\n", w2->text->data, w2->file->data, w2->line); + + if (SCF_LEX_WORD_EOF == w2->type) { + scf_loge("'#endif' NOT found for '#if' in file: %s, line: %d\n", w->file->data, w->line); + scf_lex_word_free(w2); + scf_lex_word_free(w1); + goto error; + } + + if (n_if < 1) { + scf_loge("extra '#%s' without an '#if' in file: %s, line: %d\n", w2->text->data, w2->file->data, w2->line); + scf_lex_word_free(w2); + scf_lex_word_free(w1); + goto error; + } + + if (SCF_LEX_WORD_KEY_ELSE == w2->type || SCF_LEX_WORD_KEY_ENDIF == w2->type) { + w3 = dfa->ops->pop_word(dfa); + if (!w3) { + scf_lex_word_free(w2); + scf_lex_word_free(w1); + goto error; + } + w3->next = NULL; + + if (SCF_LEX_WORD_LF != w3->type) { + scf_loge("'\\n' NOT found after '#%s' in file: %s, line: %d\n", w2->text->data, w2->file->data, w2->line); + scf_lex_word_free(w3); + scf_lex_word_free(w2); + scf_lex_word_free(w1); + goto error; + } + + if (SCF_LEX_WORD_KEY_ELSE == w2->type) { + if (1 == n_if) { + flag = !flag; + scf_lex_word_free(w3); + scf_lex_word_free(w2); + scf_lex_word_free(w1); + continue; + } + } else { + if (0 == --n_if) { + scf_lex_word_free(w3); + scf_lex_word_free(w2); + scf_lex_word_free(w1); + break; + } + } + + if (flag) + w2->next = w3; + else + scf_lex_word_free(w3); + w3 = NULL; + + } else if (SCF_LEX_WORD_KEY_IF == w2->type) { + n_if++; + } + + if (flag) + w1->next = w2; + else + scf_lex_word_free(w2); + w2 = NULL; + } + + if (flag) { + while (w1) { + w2 = w1->next; + w1->next = h; + h = w1; + w1 = w2; + } + } else + scf_lex_word_free(w1); + w1 = NULL; + } + + while (h) { + w = h; + scf_logd("'%s' file: %s, line: %d\n", w->text->data, w->file->data, w->line); + h = w->next; + dfa->ops->push_word(dfa, w); + } + + return SCF_DFA_OK; + +error: + while (h) { + w = h; + h = w->next; + scf_lex_word_free(w); + } + return SCF_DFA_ERROR; +} + static int _dfa_init_module_macro(scf_dfa_t* dfa) { SCF_DFA_MODULE_NODE(dfa, macro, hash, scf_dfa_is_hash, scf_dfa_action_next); + SCF_DFA_MODULE_NODE(dfa, macro, _if, scf_dfa_is_if, _macro_action_if); return SCF_DFA_OK; } @@ -14,8 +175,11 @@ static int _dfa_init_module_macro(scf_dfa_t* dfa) static int _dfa_init_syntax_macro(scf_dfa_t* dfa) { SCF_DFA_GET_MODULE_NODE(dfa, macro, hash, hash); + SCF_DFA_GET_MODULE_NODE(dfa, macro, _if, _if); scf_vector_add(dfa->syntaxes, hash); + + scf_dfa_node_add_child(hash, _if); return 0; } -- 2.25.1