asm: support .align and .org for x64
authoryu.dongliang <18588496441@163.com>
Fri, 23 Jan 2026 14:43:52 +0000 (22:43 +0800)
committeryu.dongliang <18588496441@163.com>
Fri, 23 Jan 2026 14:43:52 +0000 (22:43 +0800)
asm/1.s
asm/scf_asm.c
asm/scf_asm.h
asm/scf_dfa_x64.c
native/scf_instruction.h
parse/scf_dfa_util.h

diff --git a/asm/1.s b/asm/1.s
index 5f5edb81b3be68e31cb36d02a83300e9d9ddcb80..3e985568dc5f02c6fd5ea539ba57a56b7dcf9393 100644 (file)
--- a/asm/1.s
+++ b/asm/1.s
@@ -10,6 +10,8 @@ main:
        call printf
        pop  %rbp
        ret
+.align 3
+.org 509
 1:
        call 0b
 .asciz "hello world\n"
index 46046bc7d676ff13a3c4fcecaf680d2eb02fddce..5fb2280797bc22d89263b536eb60b6c91aa014ed 100644 (file)
@@ -1,7 +1,7 @@
 #include"scf_asm.h"
 #include"scf_symtab.h"
 
-void _x64_set_offset_for_jmps(scf_vector_t* text);
+int  _x64_set_offset_for_jmps(scf_vector_t* text);
 int _naja_set_offset_for_jmps(scf_vector_t* text);
 
 int scf_asm_open(scf_asm_t** pasm, const char* arch)
@@ -144,6 +144,41 @@ int scf_asm_file(scf_asm_t* _asm, const char* path)
        return ret;
 }
 
+int scf_asm_len(scf_vector_t* instructions)
+{
+       scf_instruction_t* inst;
+       int offset = 0;
+       int i;
+
+       for (i = 0; i < instructions->size; i++) {
+               inst      = instructions->data[i];
+
+               if (inst->align > 0) {
+                       int n = offset & (inst->align - 1);
+                       if (n > 0)
+                               offset += inst->align - n;
+               }
+
+               if (inst->org > 0) {
+                       if (offset > inst->org) {
+                               scf_loge(".org %#x less than .text length %#x\n", inst->org, offset);
+                               return -1;
+                       }
+
+                       offset = inst->org;
+               }
+
+               inst->offset = offset;
+
+               if (inst->len > 0)
+                       offset += inst->len;
+               else if (inst->bin)
+                       offset += inst->bin->len;
+       }
+
+       return offset;
+}
+
 static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm)
 {
        scf_instruction_t* inst;
@@ -154,16 +189,18 @@ static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm)
        switch (elf->ops->arch)
        {
                case SCF_ELF_X64:
-                       _x64_set_offset_for_jmps(_asm->text);
+                       ret = _x64_set_offset_for_jmps(_asm->text);
                        break;
 
                case SCF_ELF_NAJA:
-                       _naja_set_offset_for_jmps(_asm->text);
+                       ret = _naja_set_offset_for_jmps(_asm->text);
                        break;
                default:
                        scf_loge("%s NOT support\n", elf->ops->machine);
                        break;
        };
+       if (ret < 0)
+               return ret;
 
        text = scf_string_alloc();
        if (!text)
@@ -172,7 +209,16 @@ static int __asm_add_text(scf_elf_context_t* elf, scf_asm_t* _asm)
        for (i = 0; i < _asm->text->size; i++) {
                inst      = _asm->text->data[i];
 
-               inst->offset = text->len;
+               int n = inst->offset - text->len;
+               assert(n >= 0);
+
+               if (n > 0) {
+                       ret = scf_string_fill_zero(text, n);
+                       if (ret < 0) {
+                               scf_string_free(text);
+                               return ret;
+                       }
+               }
 
                if (inst->len > 0)
                        ret = scf_string_cat_cstr_len(text, inst->code, inst->len);
index 9cfeca17d9be3ee5650c87679e002d39131fafc0..cb15a940faefba95d654ab3863f98bd6d9c9994e 100644 (file)
@@ -48,6 +48,9 @@ struct dfa_asm_s
 
        int                type;
 
+       int                align;
+       int                org;
+
        int                n_comma;
        int                n_lp;
        int                n_rp;
@@ -61,6 +64,7 @@ int scf_asm_close (scf_asm_t*  _asm);
 int scf_asm_file  (scf_asm_t*  _asm, const char* path);
 int scf_asm_to_obj(scf_asm_t*  _asm, const char* obj, const char* arch);
 
+int scf_asm_len(scf_vector_t* instructions);
 
 static inline int __inst_data_is_reg(scf_inst_data_t* id)
 {
index b60eeb9613ba5ddba7a42d6ee2e5a3a56cbeea77..007da3c8d50a4df61cd8cb5f12f6a6a6fc0fb08f 100644 (file)
@@ -61,6 +61,7 @@ static int _x64_action_fill(scf_dfa_t* dfa, scf_vector_t* words, void* data)
        scf_lex_word_t*  w   = words->data[words->size - 1];
 
        d->fill = w;
+       d->i    = 0;
 
        return SCF_DFA_NEXT_WORD;
 }
@@ -720,46 +721,77 @@ static int _x64_action_LF(scf_dfa_t* dfa, scf_vector_t* words, void* data)
                        d   ->label = NULL;
                }
 
+               inst->align = d->align;
+               inst->org   = d->org;
+
+               d->align = 0;
+               d->org   = 0;
+
                scf_instruction_print(inst);
                d->opcode = NULL;
 
        } else if (d->fill) {
-               if (d->i < 3) {
-                       scf_loge(".fill needs 3 operands, file: %s, line: %d\n", d->fill->file->data, d->fill->line);
-                       return SCF_DFA_ERROR;
-               }
+               if (SCF_LEX_WORD_ASM_ORG == d->fill->type) {
+                       if (d->i < 1) {
+                               scf_loge(".org needs 1 operand, file: %s, line: %d\n", d->fill->file->data, d->fill->line);
+                               return SCF_DFA_ERROR;
+                       }
 
-               int64_t  n    = d->operands[0].imm;
-               int64_t  size = d->operands[1].imm;
-               uint64_t imm  = d->operands[2].imm;
-               int64_t  i;
+                       d->org = d->operands[0].imm;
 
-               inst = calloc(1, sizeof(scf_instruction_t));
-               if (!inst)
-                       return -ENOMEM;
+               } else if (SCF_LEX_WORD_ASM_ALIGN == d->fill->type) {
+                       if (d->i < 1) {
+                               scf_loge(".align needs 1 operand, file: %s, line: %d\n", d->fill->file->data, d->fill->line);
+                               return SCF_DFA_ERROR;
+                       }
 
-               if (n * size <= sizeof(inst->code)) {
-                       for (i = 0; i < n; i++)
-                               memcpy(inst->code + i * size, (uint8_t*)&imm, size);
+                       d->align = 1 << d->operands[0].imm;
 
-                       inst->len = n * size;
-               } else {
-                       inst->bin = scf_string_alloc();
-                       if (inst->bin) {
-                               scf_instruction_free(inst);
-                               return -ENOMEM;
+               } else if (SCF_LEX_WORD_ASM_FILL == d->fill->type) {
+                       if (d->i < 3) {
+                               scf_loge(".fill needs 3 operands, file: %s, line: %d\n", d->fill->file->data, d->fill->line);
+                               return SCF_DFA_ERROR;
                        }
 
-                       for (i = 0; i < n; i++) {
-                               int ret = scf_string_cat_cstr_len(inst->bin, (uint8_t*)&imm, size);
-                               if (ret < 0) {
+                       int64_t  n    = d->operands[0].imm;
+                       int64_t  size = d->operands[1].imm;
+                       uint64_t imm  = d->operands[2].imm;
+                       int64_t  i;
+
+                       inst = calloc(1, sizeof(scf_instruction_t));
+                       if (!inst)
+                               return -ENOMEM;
+
+                       if (n * size <= sizeof(inst->code)) {
+                               for (i = 0; i < n; i++)
+                                       memcpy(inst->code + i * size, (uint8_t*)&imm, size);
+
+                               inst->len = n * size;
+                       } else {
+                               inst->bin = scf_string_alloc();
+                               if (inst->bin) {
                                        scf_instruction_free(inst);
                                        return -ENOMEM;
                                }
+
+                               for (i = 0; i < n; i++) {
+                                       int ret = scf_string_cat_cstr_len(inst->bin, (uint8_t*)&imm, size);
+                                       if (ret < 0) {
+                                               scf_instruction_free(inst);
+                                               return -ENOMEM;
+                                       }
+                               }
                        }
+
+                       X64_INST_ADD_CHECK(_asm->current, inst, NULL);
+
+                       inst->align = d->align;
+                       inst->org   = d->org;
+
+                       d->align = 0;
+                       d->org   = 0;
                }
 
-               X64_INST_ADD_CHECK(_asm->current, inst, NULL);
                d->fill = NULL;
        }
 
@@ -866,6 +898,9 @@ int _x64_set_offset_for_jmps(scf_vector_t* text)
        while (1) {
                int drop_bytes = 0;
 
+               if (scf_asm_len(text) < 0)
+                       return -1;
+
                for (i = 0; i < text->size; i++) {
                        inst      = text->data[i];
 
@@ -877,33 +912,10 @@ int _x64_set_offset_for_jmps(scf_vector_t* text)
                                continue;
 
                        int32_t bytes = 0;
-
                        switch (inst->flag) {
                                case 1:
-                                       for (j = i; j >= 0; j--) {
-                                               dst = text->data[j];
-
-                                               if (dst->len > 0)
-                                                       bytes -= dst->len;
-                                               else if (dst->bin)
-                                                       bytes -= dst->bin->len;
-
-                                               if (dst == inst->next)
-                                                       break;
-                                       }
-                                       break;
                                case 2:
-                                       for (j = i + 1; j < text->size; j++) {
-                                               dst           = text->data[j];
-
-                                               if (dst == inst->next)
-                                                       break;
-
-                                               if (dst->len > 0)
-                                                       bytes += dst->len;
-                                               else if (dst->bin)
-                                                       bytes += dst->bin->len;
-                                       }
+                                       bytes = inst->next->offset - (inst->offset + inst->len);
                                        break;
                                default:
                                        break;
@@ -941,6 +953,8 @@ static int _dfa_init_module_x64(scf_dfa_t* dfa)
        SCF_DFA_MODULE_NODE(dfa, x64, data,     scf_asm_is_data,      _x64_action_data);
        SCF_DFA_MODULE_NODE(dfa, x64, global,   scf_asm_is_global,    _x64_action_global);
        SCF_DFA_MODULE_NODE(dfa, x64, fill,     scf_asm_is_fill,      _x64_action_fill);
+       SCF_DFA_MODULE_NODE(dfa, x64, align,    scf_asm_is_align,     _x64_action_fill);
+       SCF_DFA_MODULE_NODE(dfa, x64, org,      scf_asm_is_org,       _x64_action_fill);
 
        SCF_DFA_MODULE_NODE(dfa, x64, type,     scf_asm_is_type,      _x64_action_type);
        SCF_DFA_MODULE_NODE(dfa, x64, str,      scf_asm_is_str,       _x64_action_str);
@@ -974,6 +988,8 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa)
        SCF_DFA_GET_MODULE_NODE(dfa, x64, data,      data);
        SCF_DFA_GET_MODULE_NODE(dfa, x64, global,    global);
        SCF_DFA_GET_MODULE_NODE(dfa, x64, fill,      fill);
+       SCF_DFA_GET_MODULE_NODE(dfa, x64, align,     align);
+       SCF_DFA_GET_MODULE_NODE(dfa, x64, org,       org);
 
        SCF_DFA_GET_MODULE_NODE(dfa, x64, identity,  identity);
        SCF_DFA_GET_MODULE_NODE(dfa, x64, colon,     colon);
@@ -997,6 +1013,8 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa)
        scf_vector_add(dfa->syntaxes, data);
        scf_vector_add(dfa->syntaxes, global);
        scf_vector_add(dfa->syntaxes, fill);
+       scf_vector_add(dfa->syntaxes, align);
+       scf_vector_add(dfa->syntaxes, org);
 
        scf_vector_add(dfa->syntaxes, opcode);
        scf_vector_add(dfa->syntaxes, identity);
@@ -1068,8 +1086,9 @@ static int _dfa_init_syntax_x64(scf_dfa_t* dfa)
        scf_dfa_node_add_child(rp,        comma);
        scf_dfa_node_add_child(rp,        LF);
 
-       // .fill
-       scf_dfa_node_add_child(fill,      number);
+       scf_dfa_node_add_child(fill,      number); // .fill
+       scf_dfa_node_add_child(align,     number); // .align
+       scf_dfa_node_add_child(org,       number); // .org
 
        return SCF_DFA_OK;
 }
index 35c2391090ccd24343bc39cd6f8853b653a7a1a8..f8cd59f8080d187a0afbd7e4266eb991a44121ef 100644 (file)
@@ -64,6 +64,9 @@ struct scf_instruction_s
        scf_string_t*       bin;    // asm binary data, maybe in .text or .data
        int                 offset; // asm offset,      maybe in .text or .data
 
+       int                 align; // asm .align
+       int                 org;   // asm .org
+
        int                 len;
        uint8_t             code[32];
 
index 5df5405bb9015a28610c0e2f99f4c6fe8e957ceb..0415a25d831555b68f06c40fca2695462cbbb9ef 100644 (file)
@@ -354,6 +354,20 @@ static int scf_asm_is_fill(scf_dfa_t* dfa, void* word)
        return SCF_LEX_WORD_ASM_FILL == w->type;
 }
 
+static int scf_asm_is_align(scf_dfa_t* dfa, void* word)
+{
+       scf_lex_word_t* w = word;
+
+       return SCF_LEX_WORD_ASM_ALIGN == w->type;
+}
+
+static int scf_asm_is_org(scf_dfa_t* dfa, void* word)
+{
+       scf_lex_word_t* w = word;
+
+       return SCF_LEX_WORD_ASM_ORG == w->type;
+}
+
 static int scf_asm_is_type(scf_dfa_t* dfa, void* word)
 {
        scf_lex_word_t* w = word;