From 9f58ca78dc161da6a888e8275ba2678426c576e8 Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Thu, 22 Jun 2023 19:07:45 +0800 Subject: [PATCH] x64: callee don't save no-used register, align to 16 bytes because libc needs --- core/scf_function.h | 1 + lib/scf_object.o | Bin 7358 -> 7391 bytes native/risc/scf_risc_reg_arm64.c | 2 +- native/x64/scf_x64.c | 66 +++++++++++++------ native/x64/scf_x64_inst.c | 42 +++--------- native/x64/scf_x64_reg.c | 108 +++++++++++++++++++++++++++---- native/x64/scf_x64_reg.h | 3 + 7 files changed, 160 insertions(+), 62 deletions(-) diff --git a/core/scf_function.h b/core/scf_function.h index 7d6a654..744d07e 100644 --- a/core/scf_function.h +++ b/core/scf_function.h @@ -49,6 +49,7 @@ struct scf_function_s { scf_3ac_code_t* init_code; int init_code_bytes; + int callee_saved_size; int local_vars_size; int code_bytes; diff --git a/lib/scf_object.o b/lib/scf_object.o index 5172fccbdc80f9d35976b5dd367fbf0e2930e9c7..902ff7897c9b6a8cce015a9cd8a6bc5d6f73c275 100644 GIT binary patch delta 2733 zcmZ`*YfMx}6ux&Z%P#U*xC?t()S^~`qOclb!1~-Mbx0+GNEH-O%=)SiB1>NisBu$n zRw{Tjt!ZtsNgIEtX|##8R$6Uq?GIaP8quUy{b3@}T4Jn=(Yifn?yM{{9oRE-&UfZK zXJ)s1>W!%#U21vfp`wJ;Bt?wW&Ra!nRC1@!4U^Py>L{pIXQ_MOfI2H-d8(x{7jCIV z37@1{-0v`&6{#&y&1R{`-~iI|a1H5l@MyCVUe2<#l3=A)q%MR5S|R+V<=S7pL-OT& z<0hzEVPf3o)`db)Q^P|gZA-a%ul75oikr9jj(cCCVkI}5`?=ZF$HOK45ga5`ocq2t zgWHh{3rg~)acH2Qhl+VH!p)NY0lhbc9#I;BF}!^W+2HLnXo9~S!ywycYwdNZD3mtL zCuD=0SNSQKyTZp`;O05rK8%}HZQQKw;AXG^4RZ6$S~2~?g-ZV%TQV0j>lMg#rf_!oaTqh z+W6^c)l0`;D>E;YhL%u1q9NT5D_D$vKUI}azRbgqs5Rz(YBILPDw)M~mrHfnW6OXF zd!G6;Y_oggZM{1YfMfO*3BF3vE#FdbIYz51VT!}I%x87bSB-y5sEt%TGdLS*SWsL< zoVz|n8$8s6b$S$-6RPdo@q`jNMJ}ED&hk}fF`8o3AV%^fN0V*cn`ihng!i5j2Ps+QI8&am>C&np+j>z3EtU5Z!r zrpPoG18y4#c(KT6p|8?c8L7I7V7hK!Hxt+8|0+szc?EY z8B_9*SJfve$w)C(Waz)3z`J2xy%#p6X16}L!p7=7n#SCEKvP+kzDr|lxQ@$0U9+)l z9f7ad2>rAc$HwR%BWUPH*kt`{T;&KJl^M&#-5Q%jTPEqqJ-{uvPSdaAd1C3LGLgtj zI?B-aWc~F+$hul}bcy^tU3=robS;@&$1#^4VELq;uJ1%JhnB((#-y=4vRtU&J%ZAF z{i?8|UtvZ1Llji$d)Y|RFBQS%k~!S47Tr$QUO>NHh-H$(czrsX0PSh7 zWq7Xs4|)R*B3dGk)6l*1;iAi6+cXj1VfY7`xi-P9hx~NIGd+$xT0RqTj`~~!wTR}* z!%9{&V$cF;Pd8XuJf*@;#*@9QvGN>}ZmgvpcFNJSCm!WE2sZE&b#xF$W*BUQgKV&A z4roDBaq=dTt|vw}(E%U1+_oYVDV=9sa3o`ix(saYd1?ccxec`${BAG)Fxii^3w}U) z68=Vd5ps+LFk6kw(C8)%8N;F7FkrhtwIP_IjE6IZ;USlrBC8RjGY9@OjMfDLmHEe@ zHMrJeJVqaj*Q7kkcgdi(O8LO!VHxX`@}Y0XXO=u~;0>_!l`n8k6Q#BU@@JXy6CBMn zJhvrUEVb#Z(Rd8k5skxOXm7bC6i1krZb)aN2PW=3<#&&<*hxDpVi6!1@RvhfB*mh delta 2594 zcmZ`*eQZ-z6o2==b=?PByW7&&SCGksVd-_dA<1VN3!A0!e?oMHsOsOQ|?N-=nnckempcfRj^ zuRSzp*PMZ^?7G2i4W-2sEKQYzM;dszV$h2~9lMZ)p`FFqldy%wOBV-oCUH2z8cGMN za?(DS${W}UXyKT_O zn@(ks-l155p|SL*s((GoJyS5wsRbFUB|Z9X4%?I7o3k1oOU{6?@@SV%sK$=XVwQ!b z3)Dwa{Vd-zy;}03gHnAa`$JN7^kK3AshTk9*ODsHv}m!{s)KlKOWUOyUW=L2qbl8v zd-5SQyb5<1^i58kZFsV29D!_ds!Dg_E;El06vn*OYt1Icy|!f(^Z&9PUqpkFnIIO` zr~2C{j_PmK+gG{FSdi4=6ENh9rA(=642z}V$oKMXNEJQj#HqD{i6?%L>8$QOFf>ZD z$1|XAsmnlmgj<#ln~BQww`XQfLnEh@6&M~(`i`yDb}gr5#%)&LEKrHQOwV}lX*>ce z@XX!%BwG69?))J%U-Wy6`lLD2RwC5_xx2*FtcLaE>i5c%C#BkBX0Ot@Ii&R*GWtBI z&|R%~(W?FP*~hTLUu*N@>tKt&vnF?Rw|PswcAWMd?NV1w8E#bJea+cD3+C<%JiQ8L zlz1{{i*|S>{WMnUd>(baVHh1`rU$3*Rr*Rahq01* z=H{1`o@cnaQm6G?SN>&;bLIfjSu=EnC!tyn_xO&Tz)bo?RGMymMz3?5#smEaVN>bl ztPegZUC9DaS2o|(9jK2z)9+HR_jjsu-JLL4)(R8Kf)kD}!Pn_9#>pn911opzY~nCf zmsk6ZD>Rp1e8ttLv)TE%bmzP@egCPMZb`BWww2E@%^oSQsXW{Bjo^ez2Ip67h$YZD zPuG>?Im;?u3A-xRg_5XYk*+g(>jW&UygIaz^fNnfJz=odVWhGpyqWaZkp4xybwo$` zrohvUuOi2~Nw0jQwXE3&XU~)5>+VMUDdhOzTHk~BJlcigHZ69;u7FeZK-cQ7n=6Kc6PP zwrw5}Nt^MQ*kE%$Ng!J@-?x1*W)wQXuEY%D1C(wm7g2w-C0`Abv$|k=Vq~Bl9L*9Y!i5r&XA`(VoJu5}Oc< zl0l7V)n-z5xtVztTNmwY7$bBTDB2(;JO(O;rc2BoLcq) z#GQq#5}tJ&mpZj|ZN}MblVLZa(==LiHiS|g{Df$uMooJKHWI3%PHG8i1iey*@*jc~ zMxU2oHzxEcA^S5QhMXh4^rZjqhhfC%|9!J_=6VWM7Kb;ZRqh_tA+P67uxo6-heJ^( z_KQKK(V9_Pe_IfABW?f-Bj-*7n}1K%B@lNV_Zou^7?dsrQ5&p5bhSbCPXTHtYRt0I mFoNh#gXX70uL{wp@TKd(7jBdt*4%02EU1k|-?%9@$o~N(SIA@l diff --git a/native/risc/scf_risc_reg_arm64.c b/native/risc/scf_risc_reg_arm64.c index 3952609..2ed8758 100644 --- a/native/risc/scf_risc_reg_arm64.c +++ b/native/risc/scf_risc_reg_arm64.c @@ -715,7 +715,7 @@ int arm64_overflow_reg(scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f) int arm64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f) { scf_register_t* r2; - scf_dag_node_t* dn2; + scf_dag_node_t* dn2; int i; int j; diff --git a/native/x64/scf_x64.c b/native/x64/scf_x64.c index 38a83a2..2007c5c 100644 --- a/native/x64/scf_x64.c +++ b/native/x64/scf_x64.c @@ -200,13 +200,37 @@ static int _x64_function_finish(scf_function_t* f) scf_x64_OpCode_t* pop = x64_find_OpCode(SCF_X64_POP, 8,8, SCF_X64_G); scf_x64_OpCode_t* mov = x64_find_OpCode(SCF_X64_MOV, 4,4, SCF_X64_G2E); scf_x64_OpCode_t* sub = x64_find_OpCode(SCF_X64_SUB, 4,8, SCF_X64_I2E); + scf_x64_OpCode_t* ret = x64_find_OpCode(SCF_X64_RET, 8,8, SCF_X64_G); - scf_register_t* rsp = x64_find_register("rsp"); - scf_register_t* rbp = x64_find_register("rbp"); - scf_register_t* r; + scf_register_t* rsp = x64_find_register("rsp"); + scf_register_t* rbp = x64_find_register("rbp"); + scf_register_t* r; scf_instruction_t* inst = NULL; + scf_basic_block_t* bb; + scf_3ac_code_t* end; + scf_list_t* l; + + l = scf_list_tail(&f->basic_block_list_head); + bb = scf_list_data(l, scf_basic_block_t, list); + + l = scf_list_tail(&bb->code_list_head); + end = scf_list_data(l, scf_3ac_code_t, list); + + int err = x64_pop_callee_regs(end, f); + if (err < 0) + return err; + if (f->bp_used_flag) { + inst = x64_make_inst_G2E(mov, rsp, rbp); + X64_INST_ADD_CHECK(end->instructions, inst); + end->inst_bytes += inst->len; + bb ->code_bytes += inst->len; + + inst = x64_make_inst_G(pop, rbp); + X64_INST_ADD_CHECK(end->instructions, inst); + end->inst_bytes += inst->len; + bb ->code_bytes += inst->len; inst = x64_make_inst_G(push, rbp); X64_INST_ADD_CHECK(f->init_code->instructions, inst); @@ -217,30 +241,36 @@ static int _x64_function_finish(scf_function_t* f) f->init_code_bytes += inst->len; uint32_t local = f->local_vars_size; - if (!(local & 0xf)) - local += 8; + + if (f->callee_saved_size & 0xf) { + if (!(local & 0xf)) + local += 8; + } else { + if ((local & 0xf)) + local += 8; + } + + scf_logw("### local: %#x, local_vars_size: %#x, callee_saved_size: %#x\n", + local, f->local_vars_size, f->callee_saved_size); inst = x64_make_inst_I2E(sub, rsp, (uint8_t*)&local, 4); - //inst = x64_make_inst_I2E(sub, rsp, (uint8_t*)&f->local_vars_size, 4); X64_INST_ADD_CHECK(f->init_code->instructions, inst); f->init_code_bytes += inst->len; - int ret = _x64_save_rabi(f); - if (ret < 0) - return ret; + int err = _x64_save_rabi(f); + if (err < 0) + return err; } else f->init_code_bytes = 0; - int i; - for (i = 0; i < X64_ABI_CALLEE_SAVES_NB; i++) { - - r = x64_find_register_type_id_bytes(0, x64_abi_callee_saves[i], 8); + err = x64_push_callee_regs(f->init_code, f); + if (err < 0) + return err; - inst = x64_make_inst_G(push, r); - X64_INST_ADD_CHECK(f->init_code->instructions, inst); - - f->init_code_bytes += inst->len; - } + inst = x64_make_inst(ret, 8); + X64_INST_ADD_CHECK(end->instructions, inst); + end->inst_bytes += inst->len; + bb ->code_bytes += inst->len; x64_registers_clear(); return 0; diff --git a/native/x64/scf_x64_inst.c b/native/x64/scf_x64_inst.c index 3cb5e45..8e2f664 100644 --- a/native/x64/scf_x64_inst.c +++ b/native/x64/scf_x64_inst.c @@ -1582,32 +1582,6 @@ static int _x64_inst_end_handler(scf_native_t* ctx, scf_3ac_code_t* c) return -ENOMEM; } - scf_register_t* rsp = x64_find_register("rsp"); - scf_register_t* rbp = x64_find_register("rbp"); - scf_register_t* r; - - scf_x64_OpCode_t* pop = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G); - scf_x64_OpCode_t* mov = x64_find_OpCode(SCF_X64_MOV, 8, 8, SCF_X64_G2E); - scf_x64_OpCode_t* ret = x64_find_OpCode(SCF_X64_RET, 8, 8, SCF_X64_G); - scf_instruction_t* inst = NULL; - - int i; - for (i = X64_ABI_CALLEE_SAVES_NB - 1; i >= 0; i--) { - - r = x64_find_register_type_id_bytes(0, x64_abi_callee_saves[i], 8); - - inst = x64_make_inst_G(pop, r); - X64_INST_ADD_CHECK(c->instructions, inst); - } - - inst = x64_make_inst_G2E(mov, rsp, rbp); - X64_INST_ADD_CHECK(c->instructions, inst); - - inst = x64_make_inst_G(pop, rbp); - X64_INST_ADD_CHECK(c->instructions, inst); - - inst = x64_make_inst(ret, 8); - X64_INST_ADD_CHECK(c->instructions, inst); return 0; } @@ -1854,11 +1828,13 @@ static int _x64_inst_push_rax_handler(scf_native_t* ctx, scf_3ac_code_t* c) return -ENOMEM; } - scf_register_t* rax = x64_find_register("rax"); - scf_x64_OpCode_t* push; + scf_register_t* rax = x64_find_register("rax"); + scf_x64_OpCode_t* push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G); scf_instruction_t* inst; - push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G); + inst = x64_make_inst_G(push, rax); + X64_INST_ADD_CHECK(c->instructions, inst); + inst = x64_make_inst_G(push, rax); X64_INST_ADD_CHECK(c->instructions, inst); return 0; @@ -1872,11 +1848,13 @@ static int _x64_inst_pop_rax_handler(scf_native_t* ctx, scf_3ac_code_t* c) return -ENOMEM; } - scf_register_t* rax = x64_find_register("rax"); - scf_x64_OpCode_t* pop; + scf_register_t* rax = x64_find_register("rax"); + scf_x64_OpCode_t* pop = x64_find_OpCode(SCF_X64_POP, 8,8, SCF_X64_G); scf_instruction_t* inst; - pop = x64_find_OpCode(SCF_X64_POP, 8,8, SCF_X64_G); + inst = x64_make_inst_G(pop, rax); + X64_INST_ADD_CHECK(c->instructions, inst); + inst = x64_make_inst_G(pop, rax); X64_INST_ADD_CHECK(c->instructions, inst); return 0; diff --git a/native/x64/scf_x64_reg.c b/native/x64/scf_x64_reg.c index 0339d3f..ad45485 100644 --- a/native/x64/scf_x64_reg.c +++ b/native/x64/scf_x64_reg.c @@ -149,6 +149,8 @@ int x64_registers_init() r->dag_nodes = scf_vector_alloc(); if (!r->dag_nodes) return -ENOMEM; + + r->used = 0; } return 0; @@ -168,6 +170,8 @@ void x64_registers_clear() scf_vector_free(r->dag_nodes); r->dag_nodes = NULL; } + + r->used = 0; } } @@ -175,12 +179,12 @@ int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs { int i; int j; - scf_register_t* r; - scf_register_t* r2; - scf_instruction_t* inst; - scf_register_t* rsp = x64_find_register("rsp"); + scf_register_t* r; + scf_register_t* r2; + scf_register_t* rsp = x64_find_register("rsp"); scf_x64_OpCode_t* mov = x64_find_OpCode(SCF_X64_MOV, 8,8, SCF_X64_G2E); scf_x64_OpCode_t* push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G); + scf_instruction_t* inst; int size = 0; int k = 0; @@ -574,13 +578,14 @@ int x64_overflow_reg(scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f) } } + r->used = 1; return 0; } int x64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f) { scf_register_t* r2; - scf_dag_node_t* dn2; + scf_dag_node_t* dn2; int i; int j; @@ -609,6 +614,7 @@ int x64_overflow_reg2(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, } } + r->used = 1; return 0; } @@ -669,6 +675,7 @@ static int _x64_overflow_reg3(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_cod } } + r->used = 1; return 0; } @@ -849,7 +856,7 @@ scf_register_t* x64_select_overflowed_reg(scf_dag_node_t* dn, scf_3ac_code_t* c) return NULL; } -int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f) +int x64_load_const(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f) { scf_instruction_t* inst; scf_x64_OpCode_t* lea; @@ -857,6 +864,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_variable_t* v; v = dn->var; + r->used = 1; int size = x64_variable_size(v); int is_float = scf_variable_float(v); @@ -873,7 +881,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_rela_t* rela = NULL; lea = x64_find_OpCode(SCF_X64_LEA, size, size, SCF_X64_E2G); - inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v); + inst = x64_make_inst_M2G(&rela, lea, r, NULL, v); X64_INST_ADD_CHECK(c->instructions, inst); X64_RELA_ADD_CHECK(f->text_relas, rela, c, NULL, v->func_ptr); @@ -881,7 +889,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_x64_OpCode_t* xor; xor = x64_find_OpCode(SCF_X64_XOR, size, size, SCF_X64_G2E); - inst = x64_make_inst_G2E(xor, rabi, rabi); + inst = x64_make_inst_G2E(xor, r, r); X64_INST_ADD_CHECK(c->instructions, inst); } @@ -895,7 +903,7 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, lea = x64_find_OpCode(SCF_X64_LEA, size, size, SCF_X64_E2G); - inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v); + inst = x64_make_inst_M2G(&rela, lea, r, NULL, v); X64_INST_ADD_CHECK(c->instructions, inst); X64_RELA_ADD_CHECK(f->data_relas, rela, c, v, NULL); @@ -906,13 +914,13 @@ int x64_load_const(scf_register_t* rabi, scf_dag_node_t* dn, scf_3ac_code_t* c, lea = x64_find_OpCode(SCF_X64_LEA, size, size, SCF_X64_E2G); - inst = x64_make_inst_M2G(&rela, lea, rabi, NULL, v); + inst = x64_make_inst_M2G(&rela, lea, r, NULL, v); X64_INST_ADD_CHECK(c->instructions, inst); X64_RELA_ADD_CHECK(f->data_relas, rela, c, v, NULL); } else { mov = x64_find_OpCode(SCF_X64_MOV, size, size, SCF_X64_I2G); - inst = x64_make_inst_I2G(mov, rabi, (uint8_t*)&v->data, size); + inst = x64_make_inst_I2G(mov, r, (uint8_t*)&v->data, size); X64_INST_ADD_CHECK(c->instructions, inst); } @@ -931,6 +939,8 @@ int x64_load_reg(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_f int is_float = scf_variable_float(dn->var); int var_size = x64_variable_size(dn->var); + r->used = 1; + if (!is_float) { if (scf_variable_const(dn->var)) { @@ -1047,6 +1057,7 @@ int x64_select_reg(scf_register_t** preg, scf_dag_node_t* dn, scf_3ac_code_t* c, } else dn->loaded = 1; + r->used = 1; *preg = r; return 0; } @@ -1345,3 +1356,78 @@ void x64_call_rabi(int* p_nints, int* p_nfloats, scf_3ac_code_t* c) *p_nfloats = nfloats; } +int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f) +{ + scf_x64_OpCode_t* push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G); + + scf_instruction_t* inst; + scf_register_t* r2; + scf_register_t* r; + + int N = sizeof(x64_registers) / sizeof(x64_registers[0]); + int i; + int j; + + for (i = 0; i < X64_ABI_CALLEE_SAVES_NB; i++) { + + j = x64_abi_callee_saves[i]; + r = x64_find_register_type_id_bytes(0, j, 8); + + for (j = 0; j < N; j++) { + r2 = &(x64_registers[j]); + + if (r2->used && X64_COLOR_CONFLICT(r2->color, r->color)) + break; + } + + if (j < N) { + inst = x64_make_inst_G(push, r); + X64_INST_ADD_CHECK(f->init_code->instructions, inst); + + f->init_code_bytes += inst->len; + } + } + + return 0; +} + +int x64_pop_callee_regs(scf_3ac_code_t* c, scf_function_t* f) +{ + scf_x64_OpCode_t* pop = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G); + + scf_basic_block_t* bb = c->basic_block; + + scf_instruction_t* inst; + scf_register_t* r2; + scf_register_t* r; + + int N = sizeof(x64_registers) / sizeof(x64_registers[0]); + int i; + int j; + + f->callee_saved_size = 0; + + for (i = X64_ABI_CALLEE_SAVES_NB - 1; i >= 0; i--) { + + j = x64_abi_callee_saves[i]; + r = x64_find_register_type_id_bytes(0, j, 8); + + for (j = 0; j < N; j++) { + r2 = &(x64_registers[j]); + + if (r2->used && X64_COLOR_CONFLICT(r2->color, r->color)) + break; + } + + if (j < N) { + inst = x64_make_inst_G(pop, r); + X64_INST_ADD_CHECK(c->instructions, inst); + + bb->code_bytes += inst->len; + f->callee_saved_size += 8; + } + } + + return 0; +} + diff --git a/native/x64/scf_x64_reg.h b/native/x64/scf_x64_reg.h index 7ac5dcd..840018b 100644 --- a/native/x64/scf_x64_reg.h +++ b/native/x64/scf_x64_reg.h @@ -133,6 +133,9 @@ int x64_push_regs(scf_vector_t* instructions, uint32_t* regs, in int x64_pop_regs (scf_vector_t* instructions, scf_register_t** regs, int nb_regs, scf_register_t** updated_regs, int nb_updated); int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs); +int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f); +int x64_pop_callee_regs (scf_3ac_code_t* c, scf_function_t* f); + int x64_save_reg (scf_register_t* r, scf_3ac_code_t* c, scf_function_t* f); int x64_load_const(scf_register_t* r, scf_dag_node_t* dn, scf_3ac_code_t* c, scf_function_t* f); -- 2.25.1