From b84d14e7139492aa2a31aa40b6583259b1ffb343 Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Fri, 23 Jan 2026 22:43:52 +0800 Subject: [PATCH] asm: support .align and .org for x64 --- asm/1.s | 2 + asm/scf_asm.c | 54 ++++++++++++++++-- asm/scf_asm.h | 4 ++ asm/scf_dfa_x64.c | 119 +++++++++++++++++++++++---------------- native/scf_instruction.h | 3 + parse/scf_dfa_util.h | 14 +++++ 6 files changed, 142 insertions(+), 54 deletions(-) diff --git a/asm/1.s b/asm/1.s index 5f5edb8..3e98556 100644 --- a/asm/1.s +++ b/asm/1.s @@ -10,6 +10,8 @@ main: call printf pop %rbp ret +.align 3 +.org 509 1: call 0b .asciz "hello world\n" diff --git a/asm/scf_asm.c b/asm/scf_asm.c index 46046bc..5fb2280 100644 --- a/asm/scf_asm.c +++ b/asm/scf_asm.c @@ -1,7 +1,7 @@ #include"scf_asm.h" #include"scf_symtab.h" -void _x64_set_offset_for_jmps(scf_vector_t* text); +int _x64_set_offset_for_jmps(scf_vector_t* text); int _naja_set_offset_for_jmps(scf_vector_t* text); int scf_asm_open(scf_asm_t** pasm, const char* arch) @@ -144,6 +144,41 @@ int scf_asm_file(scf_asm_t* _asm, const char* path) return ret; } +int scf_asm_len(scf_vector_t* instructions) +{ + scf_instruction_t* inst; + int offset = 0; + int i; + + for (i = 0; i < instructions->size; i++) { + inst = instructions->data[i]; + + if (inst->align > 0) { + int n = offset & (inst->align - 1); + if (n > 0) + offset += inst->align - n; + } + + if (inst->org > 0) { + if (offset > inst->org) { + scf_loge(".org %#x less than .text length %#x\n", inst->org, offset); + return -1; + } + + offset = inst->org; + } + + inst->offset = offset; + + if (inst->len > 0) + offset += inst->len; + else if (inst->bin) + offset += inst->bin->len; + } + + return offset; +} + static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm) { scf_instruction_t* inst; @@ -154,16 +189,18 @@ static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm) switch (elf->ops->arch) { case SCF_ELF_X64: - _x64_set_offset_for_jmps(_asm->text); + ret = _x64_set_offset_for_jmps(_asm->text); break; case SCF_ELF_NAJA: - _naja_set_offset_for_jmps(_asm->text); + ret = _naja_set_offset_for_jmps(_asm->text); break; default: scf_loge("%s NOT support\n", elf->ops->machine); break; }; + if (ret < 0) + return ret; text = scf_string_alloc(); if (!text) @@ -172,7 +209,16 @@ static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm) for (i = 0; i < _asm->text->size; i++) { inst = _asm->text->data[i]; - inst->offset = text->len; + int n = inst->offset - text->len; + assert(n >= 0); + + if (n > 0) { + ret = scf_string_fill_zero(text, n); + if (ret < 0) { + scf_string_free(text); + return ret; + } + } if (inst->len > 0) ret = scf_string_cat_cstr_len(text, inst->code, inst->len); diff --git a/asm/scf_asm.h b/asm/scf_asm.h index 9cfeca1..cb15a94 100644 --- a/asm/scf_asm.h +++ b/asm/scf_asm.h @@ -48,6 +48,9 @@ struct dfa_asm_s int type; + int align; + int org; + int n_comma; int n_lp; int n_rp; @@ -61,6 +64,7 @@ int scf_asm_close (scf_asm_t* _asm); int scf_asm_file (scf_asm_t* _asm, const char* path); int scf_asm_to_obj(scf_asm_t* _asm, const char* obj, const char* arch); +int scf_asm_len(scf_vector_t* instructions); static inline int __inst_data_is_reg(scf_inst_data_t* id) { diff --git a/asm/scf_dfa_x64.c b/asm/scf_dfa_x64.c index b60eeb9..007da3c 100644 --- a/asm/scf_dfa_x64.c +++ b/asm/scf_dfa_x64.c @@ -61,6 +61,7 @@ static int _x64_action_fill(scf_dfa_t* dfa, scf_vector_t* words, void* data) scf_lex_word_t* w = words->data[words->size - 1]; d->fill = w; + d->i = 0; return SCF_DFA_NEXT_WORD; } @@ -720,46 +721,77 @@ static int _x64_action_LF(scf_dfa_t* dfa, scf_vector_t* words, void* data) d ->label = NULL; } + inst->align = d->align; + inst->org = d->org; + + d->align = 0; + d->org = 0; + scf_instruction_print(inst); d->opcode = NULL; } else if (d->fill) { - if (d->i < 3) { - scf_loge(".fill needs 3 operands, file: %s, line: %d\n", d->fill->file->data, d->fill->line); - return SCF_DFA_ERROR; - } + if (SCF_LEX_WORD_ASM_ORG == d->fill->type) { + if (d->i < 1) { + scf_loge(".org needs 1 operand, file: %s, line: %d\n", d->fill->file->data, d->fill->line); + return SCF_DFA_ERROR; + } - int64_t n = d->operands[0].imm; - int64_t size = d->operands[1].imm; - uint64_t imm = d->operands[2].imm; - int64_t i; + d->org = d->operands[0].imm; - inst = calloc(1, sizeof(scf_instruction_t)); - if (!inst) - return -ENOMEM; + } else if (SCF_LEX_WORD_ASM_ALIGN == d->fill->type) { + if (d->i < 1) { + scf_loge(".align needs 1 operand, file: %s, line: %d\n", d->fill->file->data, d->fill->line); + return SCF_DFA_ERROR; + } - if (n * size <= sizeof(inst->code)) { - for (i = 0; i < n; i++) - memcpy(inst->code + i * size, (uint8_t*)&imm, size); + d->align = 1 << d->operands[0].imm; - inst->len = n * size; - } else { - inst->bin = scf_string_alloc(); - if (inst->bin) { - scf_instruction_free(inst); - return -ENOMEM; + } else if (SCF_LEX_WORD_ASM_FILL == d->fill->type) { + if (d->i < 3) { + scf_loge(".fill needs 3 operands, file: %s, line: %d\n", d->fill->file->data, d->fill->line); + return SCF_DFA_ERROR; } - for (i = 0; i < n; i++) { - int ret = scf_string_cat_cstr_len(inst->bin, (uint8_t*)&imm, size); - if (ret < 0) { + int64_t n = d->operands[0].imm; + int64_t size = d->operands[1].imm; + uint64_t imm = d->operands[2].imm; + int64_t i; + + inst = calloc(1, sizeof(scf_instruction_t)); + if (!inst) + return -ENOMEM; + + if (n * size <= sizeof(inst->code)) { + for (i = 0; i < n; i++) + memcpy(inst->code + i * size, (uint8_t*)&imm, size); + + inst->len = n * size; + } else { + inst->bin = scf_string_alloc(); + if (inst->bin) { scf_instruction_free(inst); return -ENOMEM; } + + for (i = 0; i < n; i++) { + int ret = scf_string_cat_cstr_len(inst->bin, (uint8_t*)&imm, size); + if (ret < 0) { + scf_instruction_free(inst); + return -ENOMEM; + } + } } + + X64_INST_ADD_CHECK(_asm->current, inst, NULL); + + inst->align = d->align; + inst->org = d->org; + + d->align = 0; + d->org = 0; } - X64_INST_ADD_CHECK(_asm->current, inst, NULL); d->fill = NULL; } @@ -866,6 +898,9 @@ int _x64_set_offset_for_jmps(scf_vector_t* text) while (1) { int drop_bytes = 0; + if (scf_asm_len(text) < 0) + return -1; + for (i = 0; i < text->size; i++) { inst = text->data[i]; @@ -877,33 +912,10 @@ int _x64_set_offset_for_jmps(scf_vector_t* text) continue; int32_t bytes = 0; - switch (inst->flag) { case 1: - for (j = i; j >= 0; j--) { - dst = text->data[j]; - - if (dst->len > 0) - bytes -= dst->len; - else if (dst->bin) - bytes -= dst->bin->len; - - if (dst == inst->next) - break; - } - break; case 2: - for (j = i + 1; j < text->size; j++) { - dst = text->data[j]; - - if (dst == inst->next) - break; - - if (dst->len > 0) - bytes += dst->len; - else if (dst->bin) - bytes += dst->bin->len; - } + bytes = inst->next->offset - (inst->offset + inst->len); break; default: break; @@ -941,6 +953,8 @@ static int _dfa_init_module_x64(scf_dfa_t* dfa) SCF_DFA_MODULE_NODE(dfa, x64, data, scf_asm_is_data, _x64_action_data); SCF_DFA_MODULE_NODE(dfa, x64, global, scf_asm_is_global, _x64_action_global); SCF_DFA_MODULE_NODE(dfa, x64, fill, scf_asm_is_fill, _x64_action_fill); + SCF_DFA_MODULE_NODE(dfa, x64, align, scf_asm_is_align, _x64_action_fill); + SCF_DFA_MODULE_NODE(dfa, x64, org, scf_asm_is_org, _x64_action_fill); SCF_DFA_MODULE_NODE(dfa, x64, type, scf_asm_is_type, _x64_action_type); SCF_DFA_MODULE_NODE(dfa, x64, str, scf_asm_is_str, _x64_action_str); @@ -974,6 +988,8 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa) SCF_DFA_GET_MODULE_NODE(dfa, x64, data, data); SCF_DFA_GET_MODULE_NODE(dfa, x64, global, global); SCF_DFA_GET_MODULE_NODE(dfa, x64, fill, fill); + SCF_DFA_GET_MODULE_NODE(dfa, x64, align, align); + SCF_DFA_GET_MODULE_NODE(dfa, x64, org, org); SCF_DFA_GET_MODULE_NODE(dfa, x64, identity, identity); SCF_DFA_GET_MODULE_NODE(dfa, x64, colon, colon); @@ -997,6 +1013,8 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa) scf_vector_add(dfa->syntaxes, data); scf_vector_add(dfa->syntaxes, global); scf_vector_add(dfa->syntaxes, fill); + scf_vector_add(dfa->syntaxes, align); + scf_vector_add(dfa->syntaxes, org); scf_vector_add(dfa->syntaxes, opcode); scf_vector_add(dfa->syntaxes, identity); @@ -1068,8 +1086,9 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa) scf_dfa_node_add_child(rp, comma); scf_dfa_node_add_child(rp, LF); - // .fill - scf_dfa_node_add_child(fill, number); + scf_dfa_node_add_child(fill, number); // .fill + scf_dfa_node_add_child(align, number); // .align + scf_dfa_node_add_child(org, number); // .org return SCF_DFA_OK; } diff --git a/native/scf_instruction.h b/native/scf_instruction.h index 35c2391..f8cd59f 100644 --- a/native/scf_instruction.h +++ b/native/scf_instruction.h @@ -64,6 +64,9 @@ struct scf_instruction_s scf_string_t* bin; // asm binary data, maybe in .text or .data int offset; // asm offset, maybe in .text or .data + int align; // asm .align + int org; // asm .org + int len; uint8_t code[32]; diff --git a/parse/scf_dfa_util.h b/parse/scf_dfa_util.h index 5df5405..0415a25 100644 --- a/parse/scf_dfa_util.h +++ b/parse/scf_dfa_util.h @@ -354,6 +354,20 @@ static int scf_asm_is_fill(scf_dfa_t* dfa, void* word) return SCF_LEX_WORD_ASM_FILL == w->type; } +static int scf_asm_is_align(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_ALIGN == w->type; +} + +static int scf_asm_is_org(scf_dfa_t* dfa, void* word) +{ + scf_lex_word_t* w = word; + + return SCF_LEX_WORD_ASM_ORG == w->type; +} + static int scf_asm_is_type(scf_dfa_t* dfa, void* word) { scf_lex_word_t* w = word; -- 2.25.1