x64: callee don't save no-used register, align to 16 bytes because libc needs
authoryu.dongliang <18588496441@163.com>
Thu, 22 Jun 2023 11:07:45 +0000 (19:07 +0800)
committeryu.dongliang <18588496441@163.com>
Thu, 22 Jun 2023 11:07:45 +0000 (19:07 +0800)
core/scf_function.h
lib/scf_object.o
native/risc/scf_risc_reg_arm64.c
native/x64/scf_x64.c
native/x64/scf_x64_inst.c
native/x64/scf_x64_reg.c
native/x64/scf_x64_reg.h

index 7d6a654e85f820dbdba0402342627523368642c4..744d07e3e8a03dec61de04d83407e00a874e1ed5 100644 (file)
@@ -49,6 +49,7 @@ struct scf_function_s {
        scf_3ac_code_t*   init_code;
        int               init_code_bytes;
 
+       int               callee_saved_size;
        int               local_vars_size;
        int               code_bytes;
 
index 5172fccbdc80f9d35976b5dd367fbf0e2930e9c7..902ff7897c9b6a8cce015a9cd8a6bc5d6f73c275 100644 (file)
Binary files a/lib/scf_object.o and b/lib/scf_object.o differ
index 39526097ef07754a87b837547edda72409429254..2ed875859c5209feee077f2ead3126df25a958cf 100644 (file)
@@ -715,7 +715,7 @@ int arm64_overflow_reg(scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f)
 int arm64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f)
 {
        scf_register_t* r2;
-       scf_dag_node_t*     dn2;
+       scf_dag_node_t* dn2;
 
        int i;
        int j;
index 38a83a2171ea0abe056575ac72564bb42c328149..2007c5c5e319e8b703aca96dfb0e0772b4db48d6 100644 (file)
@@ -200,13 +200,37 @@ static int _x64_function_finish(scf_function_t* f)
        scf_x64_OpCode_t*   pop  = x64_find_OpCode(SCF_X64_POP,  8,8, SCF_X64_G);
        scf_x64_OpCode_t*   mov  = x64_find_OpCode(SCF_X64_MOV,  4,4, SCF_X64_G2E);
        scf_x64_OpCode_t*   sub  = x64_find_OpCode(SCF_X64_SUB,  4,8, SCF_X64_I2E);
+       scf_x64_OpCode_t*   ret  = x64_find_OpCode(SCF_X64_RET,  8,8, SCF_X64_G);
 
-       scf_register_t* rsp  = x64_find_register("rsp");
-       scf_register_t* rbp  = x64_find_register("rbp");
-       scf_register_t* r;
+       scf_register_t*     rsp  = x64_find_register("rsp");
+       scf_register_t*     rbp  = x64_find_register("rbp");
+       scf_register_t*     r;
        scf_instruction_t*  inst = NULL;
 
+       scf_basic_block_t*  bb;
+       scf_3ac_code_t*     end;
+       scf_list_t*         l;
+
+       l   = scf_list_tail(&f->basic_block_list_head);
+       bb  = scf_list_data(l, scf_basic_block_t, list);
+
+       l   = scf_list_tail(&bb->code_list_head);
+       end = scf_list_data(l, scf_3ac_code_t, list);
+
+       int err = x64_pop_callee_regs(end, f);
+       if (err < 0)
+               return err;
+
        if (f->bp_used_flag) {
+               inst = x64_make_inst_G2E(mov, rsp, rbp);
+               X64_INST_ADD_CHECK(end->instructions, inst);
+               end->inst_bytes += inst->len;
+               bb ->code_bytes += inst->len;
+
+               inst = x64_make_inst_G(pop, rbp);
+               X64_INST_ADD_CHECK(end->instructions, inst);
+               end->inst_bytes += inst->len;
+               bb ->code_bytes += inst->len;
 
                inst = x64_make_inst_G(push, rbp);
                X64_INST_ADD_CHECK(f->init_code->instructions, inst);
@@ -217,30 +241,36 @@ static int _x64_function_finish(scf_function_t* f)
                f->init_code_bytes += inst->len;
 
                uint32_t local = f->local_vars_size;
-               if (!(local & 0xf))
-                       local += 8;
+
+               if (f->callee_saved_size & 0xf) {
+                       if (!(local & 0xf))
+                               local += 8;
+               } else {
+                       if ((local & 0xf))
+                               local += 8;
+               }
+
+               scf_logw("### local: %#x, local_vars_size: %#x, callee_saved_size: %#x\n",
+                               local, f->local_vars_size, f->callee_saved_size);
 
                inst = x64_make_inst_I2E(sub, rsp, (uint8_t*)&local, 4);
-               //inst = x64_make_inst_I2E(sub, rsp, (uint8_t*)&f->local_vars_size, 4);
                X64_INST_ADD_CHECK(f->init_code->instructions, inst);
                f->init_code_bytes += inst->len;
 
-               int ret = _x64_save_rabi(f);
-               if (ret < 0)
-                       return ret;
+               int err = _x64_save_rabi(f);
+               if (err < 0)
+                       return err;
        } else
                f->init_code_bytes = 0;
 
-       int i;
-       for (i = 0; i < X64_ABI_CALLEE_SAVES_NB; i++) {
-
-               r  = x64_find_register_type_id_bytes(0, x64_abi_callee_saves[i], 8);
+       err = x64_push_callee_regs(f->init_code, f);
+       if (err < 0)
+               return err;
 
-               inst = x64_make_inst_G(push, r);
-               X64_INST_ADD_CHECK(f->init_code->instructions, inst);
-
-               f->init_code_bytes += inst->len;
-       }
+       inst = x64_make_inst(ret, 8);
+       X64_INST_ADD_CHECK(end->instructions, inst);
+       end->inst_bytes += inst->len;
+       bb ->code_bytes += inst->len;
 
        x64_registers_clear();
        return 0;
index 3cb5e451ae45b052b9c2c534e427d9c8913ea2c1..8e2f664ae5c871f97633ee596ab80a48a0b76fe1 100644 (file)
@@ -1582,32 +1582,6 @@ static int _x64_inst_end_handler(scf_native_t* ctx, scf_3ac_code_t* c)
                        return -ENOMEM;
        }
 
-       scf_register_t* rsp  = x64_find_register("rsp");
-       scf_register_t* rbp  = x64_find_register("rbp");
-       scf_register_t* r;
-
-       scf_x64_OpCode_t*   pop  = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G);
-       scf_x64_OpCode_t*   mov  = x64_find_OpCode(SCF_X64_MOV, 8, 8, SCF_X64_G2E);
-       scf_x64_OpCode_t*   ret  = x64_find_OpCode(SCF_X64_RET, 8, 8, SCF_X64_G);
-       scf_instruction_t*  inst = NULL;
-
-       int i;
-       for (i = X64_ABI_CALLEE_SAVES_NB - 1; i >= 0; i--) {
-
-               r  = x64_find_register_type_id_bytes(0, x64_abi_callee_saves[i], 8);
-
-               inst = x64_make_inst_G(pop, r);
-               X64_INST_ADD_CHECK(c->instructions, inst);
-       }
-
-       inst = x64_make_inst_G2E(mov, rsp, rbp);
-       X64_INST_ADD_CHECK(c->instructions, inst);
-
-       inst = x64_make_inst_G(pop, rbp);
-       X64_INST_ADD_CHECK(c->instructions, inst);
-
-       inst = x64_make_inst(ret, 8);
-       X64_INST_ADD_CHECK(c->instructions, inst);
        return 0;
 }
 
@@ -1854,11 +1828,13 @@ static int _x64_inst_push_rax_handler(scf_native_t* ctx, scf_3ac_code_t* c)
                        return -ENOMEM;
        }
 
-       scf_register_t* rax  = x64_find_register("rax");
-       scf_x64_OpCode_t*   push;
+       scf_register_t*     rax  = x64_find_register("rax");
+       scf_x64_OpCode_t*   push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
        scf_instruction_t*  inst;
 
-       push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
+       inst = x64_make_inst_G(push, rax);
+       X64_INST_ADD_CHECK(c->instructions, inst);
+
        inst = x64_make_inst_G(push, rax);
        X64_INST_ADD_CHECK(c->instructions, inst);
        return 0;
@@ -1872,11 +1848,13 @@ static int _x64_inst_pop_rax_handler(scf_native_t* ctx, scf_3ac_code_t* c)
                        return -ENOMEM;
        }
 
-       scf_register_t* rax  = x64_find_register("rax");
-       scf_x64_OpCode_t*   pop;
+       scf_register_t*     rax  = x64_find_register("rax");
+       scf_x64_OpCode_t*   pop  = x64_find_OpCode(SCF_X64_POP, 8,8, SCF_X64_G);
        scf_instruction_t*  inst;
 
-       pop  = x64_find_OpCode(SCF_X64_POP, 8,8, SCF_X64_G);
+       inst = x64_make_inst_G(pop, rax);
+       X64_INST_ADD_CHECK(c->instructions, inst);
+
        inst = x64_make_inst_G(pop, rax);
        X64_INST_ADD_CHECK(c->instructions, inst);
        return 0;
index 0339d3fb6ce0f4bbc30be9165f6750480e2d7e13..ad45485855ae4ddb084834a10d1499fe9947f74a 100644 (file)
@@ -149,6 +149,8 @@ int x64_registers_init()
                r->dag_nodes = scf_vector_alloc();
                if (!r->dag_nodes)
                        return -ENOMEM;
+
+               r->used = 0;
        }
 
        return 0;
@@ -168,6 +170,8 @@ void x64_registers_clear()
                        scf_vector_free(r->dag_nodes);
                        r->dag_nodes = NULL;
                }
+
+               r->used = 0;
        }
 }
 
@@ -175,12 +179,12 @@ int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs
 {
        int i;
        int j;
-       scf_register_t* r;
-       scf_register_t* r2;
-       scf_instruction_t*  inst;
-       scf_register_t* rsp  = x64_find_register("rsp");
+       scf_register_t*     r;
+       scf_register_t*     r2;
+       scf_register_t*     rsp  = x64_find_register("rsp");
        scf_x64_OpCode_t*   mov  = x64_find_OpCode(SCF_X64_MOV,  8,8, SCF_X64_G2E);
        scf_x64_OpCode_t*   push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
+       scf_instruction_t*  inst;
 
        int size = 0;
        int k    = 0;
@@ -574,13 +578,14 @@ int x64_overflow_reg(scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f)
                }
        }
 
+       r->used = 1;
        return 0;
 }
 
 int x64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f)
 {
        scf_register_t* r2;
-       scf_dag_node_t*     dn2;
+       scf_dag_node_t* dn2;
 
        int i;
        int j;
@@ -609,6 +614,7 @@ int x64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c,
                }
        }
 
+       r->used = 1;
        return 0;
 }
 
@@ -669,6 +675,7 @@ static int _x64_overflow_reg3(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_cod
                }
        }
 
+       r->used = 1;
        return 0;
 }
 
@@ -849,7 +856,7 @@ scf_register_t* x64_select_overflowed_reg(scf_dag_node_t* dn, scf_3ac_code_t* c)
        return NULL;
 }
 
-int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f)
+int x64_load_const(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f)
 {
        scf_instruction_t*  inst;
        scf_x64_OpCode_t*   lea;
@@ -857,6 +864,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c,
        scf_variable_t*     v;
 
        v = dn->var;
+       r->used = 1;
 
        int size     = x64_variable_size(v);
        int is_float = scf_variable_float(v);
@@ -873,7 +881,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c,
                        scf_rela_t* rela = NULL;
 
                        lea  = x64_find_OpCode(SCF_X64_LEA,  size, size, SCF_X64_E2G);
-                       inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v);
+                       inst = x64_make_inst_M2G(&rela, lea, r, NULL, v);
                        X64_INST_ADD_CHECK(c->instructions, inst);
                        X64_RELA_ADD_CHECK(f->text_relas, rela, c, NULL, v->func_ptr);
 
@@ -881,7 +889,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c,
                        scf_x64_OpCode_t* xor;
 
                        xor  = x64_find_OpCode(SCF_X64_XOR, size, size, SCF_X64_G2E);
-                       inst = x64_make_inst_G2E(xor, rabi, rabi);
+                       inst = x64_make_inst_G2E(xor, r, r);
                        X64_INST_ADD_CHECK(c->instructions, inst);
                }
 
@@ -895,7 +903,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c,
 
                lea  = x64_find_OpCode(SCF_X64_LEA, size, size, SCF_X64_E2G);
 
-               inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v);
+               inst = x64_make_inst_M2G(&rela, lea, r, NULL, v);
                X64_INST_ADD_CHECK(c->instructions, inst);
                X64_RELA_ADD_CHECK(f->data_relas, rela, c, v, NULL);
 
@@ -906,13 +914,13 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c,
 
                lea = x64_find_OpCode(SCF_X64_LEA, size, size, SCF_X64_E2G);
 
-               inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v);
+               inst = x64_make_inst_M2G(&rela, lea, r, NULL, v);
                X64_INST_ADD_CHECK(c->instructions, inst);
                X64_RELA_ADD_CHECK(f->data_relas, rela, c, v, NULL);
 
        } else {
                mov  = x64_find_OpCode(SCF_X64_MOV, size, size, SCF_X64_I2G);
-               inst = x64_make_inst_I2G(mov, rabi, (uint8_t*)&v->data, size);
+               inst = x64_make_inst_I2G(mov, r, (uint8_t*)&v->data, size);
                X64_INST_ADD_CHECK(c->instructions, inst);
        }
 
@@ -931,6 +939,8 @@ int x64_load_reg(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_f
        int is_float = scf_variable_float(dn->var);
        int var_size = x64_variable_size(dn->var);
 
+       r->used = 1;
+
        if (!is_float) {
 
                if (scf_variable_const(dn->var)) {
@@ -1047,6 +1057,7 @@ int x64_select_reg(scf_register_t** preg, scf_dag_node_t* dn, scf_3ac_code_t* c,
        } else
                dn->loaded = 1;
 
+       r->used = 1;
        *preg = r;
        return 0;
 }
@@ -1345,3 +1356,78 @@ void x64_call_rabi(int* p_nints, int* p_nfloats, scf_3ac_code_t* c)
                *p_nfloats = nfloats;
 }
 
+int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f)
+{
+       scf_x64_OpCode_t*  push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
+
+       scf_instruction_t* inst;
+       scf_register_t*    r2;
+       scf_register_t*    r;
+
+       int N = sizeof(x64_registers) / sizeof(x64_registers[0]);
+       int i;
+       int j;
+
+       for (i = 0; i < X64_ABI_CALLEE_SAVES_NB; i++) {
+
+               j  =  x64_abi_callee_saves[i];
+               r  =  x64_find_register_type_id_bytes(0, j, 8);
+
+               for (j = 0; j < N; j++) {
+                       r2 = &(x64_registers[j]);
+
+                       if (r2->used && X64_COLOR_CONFLICT(r2->color, r->color))
+                               break;
+               }
+
+               if (j < N) {
+                       inst = x64_make_inst_G(push, r);
+                       X64_INST_ADD_CHECK(f->init_code->instructions, inst);
+
+                       f->init_code_bytes += inst->len;
+               }
+       }
+
+       return 0;
+}
+
+int x64_pop_callee_regs(scf_3ac_code_t* c, scf_function_t* f)
+{
+       scf_x64_OpCode_t*  pop  = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G);
+
+       scf_basic_block_t* bb   = c->basic_block;
+
+       scf_instruction_t* inst;
+       scf_register_t*    r2;
+       scf_register_t*    r;
+
+       int N = sizeof(x64_registers) / sizeof(x64_registers[0]);
+       int i;
+       int j;
+
+       f->callee_saved_size = 0;
+
+       for (i = X64_ABI_CALLEE_SAVES_NB - 1; i >= 0; i--) {
+
+               j  = x64_abi_callee_saves[i];
+               r  = x64_find_register_type_id_bytes(0, j, 8);
+
+               for (j = 0; j < N; j++) {
+                       r2 = &(x64_registers[j]);
+
+                       if (r2->used && X64_COLOR_CONFLICT(r2->color, r->color))
+                               break;
+               }
+
+               if (j < N) {
+                       inst = x64_make_inst_G(pop, r);
+                       X64_INST_ADD_CHECK(c->instructions, inst);
+
+                       bb->code_bytes       += inst->len;
+                       f->callee_saved_size += 8;
+               }
+       }
+
+       return 0;
+}
+
index 7ac5dcdaabb0d99dd430b7667318b850d1bbb5a5..840018bcc8e485b8ab47115ef99b03b36bc965b7 100644 (file)
@@ -133,6 +133,9 @@ int                 x64_push_regs(scf_vector_t* instructions, uint32_t* regs, in
 int                 x64_pop_regs (scf_vector_t* instructions, scf_register_t** regs, int nb_regs, scf_register_t** updated_regs, int nb_updated);
 int                 x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs);
 
+int                 x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f);
+int                 x64_pop_callee_regs (scf_3ac_code_t* c, scf_function_t* f);
+
 int                 x64_save_reg  (scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f);
 
 int                 x64_load_const(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f);