From 0026644e8ea80f1b5e9942d6d24da8e79a90b08c Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Sun, 14 Jun 2026 19:09:00 +0800 Subject: [PATCH] 1, fix: memory layout of stack error int X64, 2, fix: x64 peephole: when a register is changed, then memory data based on it invalid. --- core/scf_optimizer.c | 3 + examples/mat.c | 74 +--------------- examples/mat2.c | 156 ++++++++++++++++++++++++++++++++++ native/x64/scf_x64.c | 70 ++++++++------- native/x64/scf_x64_inst.c | 6 +- native/x64/scf_x64_opcode.c | 3 +- native/x64/scf_x64_peephole.c | 153 +++++++++++++-------------------- native/x64/scf_x64_reg.c | 51 ++++++++++- native/x64/scf_x64_reg.h | 4 +- parse/Makefile | 2 +- 10 files changed, 319 insertions(+), 203 deletions(-) create mode 100644 examples/mat2.c diff --git a/core/scf_optimizer.c b/core/scf_optimizer.c index 80099ab..d69fa20 100644 --- a/core/scf_optimizer.c +++ b/core/scf_optimizer.c @@ -109,6 +109,9 @@ int scf_optimize(scf_ast_t* ast, scf_vector_t* functions) if (!f->node.define_flag) continue; + if (strcmp(f->node.w->text->data, "main")) + continue; + printf("\n"); scf_logi("------- %s() ------\n", f->node.w->text->data); diff --git a/examples/mat.c b/examples/mat.c index 2db49a0..0395a17 100644 --- a/examples/mat.c +++ b/examples/mat.c @@ -66,64 +66,6 @@ struct mat return 0; } - mat*, int operator+(mat* this, mat* that) - { - mat* res; - - res = new mat(MAT_TYPE_DOUBLE, NULL, this->depth, this->width, this->height, this->count); - if (!res) - return NULL, -1; - - int64_t c; - int64_t y; - int64_t x; - int64_t z; - - double* d0 = (double*)this->data; - double* d1 = (double*)that->data; - double* d2 = (double*)res->data; - - for (c = 0; c < this->count; c++) { - - int64_t c0 = this->c + this->cstep * c; - int64_t c1 = that->c + that->cstep * c; - - int64_t coffset0 = c0 * this->height * this->xstride; - int64_t coffset1 = c1 * that->height * that->xstride; - int64_t coffset2 = c * res->height * res->xstride; - - for (y = 0; y < this->height; y++) { - - int64_t y0 = this->y + this->ystep * y; - int64_t y1 = that->y + that->ystep * y; - - int64_t yoffset0 = y0 * this->xstride * this->depth; - int64_t yoffset1 = y1 * that->xstride * that->depth; - int64_t yoffset2 = y * res->xstride * res->depth; - - for (x = 0; x < this->width; x++) { - - int64_t x0 = this->x + this->xstep * x; - int64_t x1 = that->x + that->xstep * x; - - int64_t xoffset0 = x0 * this->depth; - int64_t xoffset1 = x1 * that->depth; - int64_t xoffset2 = x * res->depth; - - for (z = 0; z < this->depth; z++) { - - int64_t z0 = this->z + this->zstep * z; - int64_t z1 = that->z + that->zstep * z; - - d2[coffset2 + yoffset2 + xoffset2 + z] = d1[coffset1 + yoffset1 + xoffset1 + z1] + d0[coffset0 + yoffset0 + xoffset0 + z0]; - } - } - } - } - - return res, 0; - } - void __release(mat* this) { if (this->data) @@ -135,22 +77,10 @@ struct mat int main() { double a[4] = {1, 2, 3, 4}; - double b[4] = {5, 6, 7, 8}; - double c[4] = {9, 10, 11, 12}; - - mat* m0; - mat* m1; - mat* m2; - mat* m3; - mat* m4; - - m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1); - m1 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)b, 1, 2, 2, 1); - m2 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)c, 1, 2, 2, 1); - m3 = m0 + m1 + m2; + mat* m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1); - double* dd = (double*)m3->data; + double* dd = (double*)m0->data; int i; for (i = 0; i < 4; i++) diff --git a/examples/mat2.c b/examples/mat2.c new file mode 100644 index 0000000..62922ef --- /dev/null +++ b/examples/mat2.c @@ -0,0 +1,156 @@ + +#include"../lib/scf_capi.c" + +const int MAT_TYPE_NONE = 0; +const int MAT_TYPE_U8 = 1; +const int MAT_TYPE_DOUBLE = 2; + +struct mat; + +struct mat +{ + uint8_t* data; + + int64_t z; + int64_t x; + int64_t y; + int64_t c; + + int64_t depth; + int64_t width; + int64_t height; + int64_t count; + + int64_t xstep; + int64_t ystep; + int64_t zstep; + int64_t cstep; + + int64_t xstride; + int type; + + int __init(mat* this, int type, uint8_t* data, int depth, int width, int height, int count) + { + this->z = 0; + this->x = 0; + this->y = 0; + this->c = 0; + + this->depth = depth; + this->width = width; + this->height = height; + this->count = count; + + this->xstep = 1; + this->ystep = 1; + this->zstep = 1; + this->cstep = 1; + this->type = type; + + this->xstride = this->width; + int64_t size = this->depth * this->width * this->height * this->count; + + if (MAT_TYPE_U8 == type) { + + } else if (MAT_TYPE_DOUBLE == type) { + size *= sizeof(double); + } else + return -1; + + this->data = scf__auto_malloc(size); + if (!this->data) + return -1; + + if (data) + memcpy(this->data, data, size); + return 0; + } + + mat*, int operator+(mat* this, mat* that) + { + mat* res; + + res = new mat(MAT_TYPE_DOUBLE, NULL, this->depth, this->width, this->height, this->count); + if (!res) + return NULL, -1; + + int64_t c; + int64_t y; + int64_t x; + int64_t z; + + double* d0 = (double*)this->data; + double* d1 = (double*)that->data; + double* d2 = (double*)res->data; + + for (c = 0; c < this->count; c++) { + + int64_t c0 = this->c + this->cstep * c; + int64_t c1 = that->c + that->cstep * c; + + int64_t coffset0 = c0 * this->height * this->xstride; + int64_t coffset1 = c1 * that->height * that->xstride; + int64_t coffset2 = c * res->height * res->xstride; + + for (y = 0; y < this->height; y++) { + + int64_t y0 = this->y + this->ystep * y; + int64_t y1 = that->y + that->ystep * y; + + int64_t yoffset0 = y0 * this->xstride * this->depth; + int64_t yoffset1 = y1 * that->xstride * that->depth; + int64_t yoffset2 = y * res->xstride * res->depth; + + for (x = 0; x < this->width; x++) { + + int64_t x0 = this->x + this->xstep * x; + int64_t x1 = that->x + that->xstep * x; + + int64_t xoffset0 = x0 * this->depth; + int64_t xoffset1 = x1 * that->depth; + int64_t xoffset2 = x * res->depth; + + for (z = 0; z < this->depth; z++) { + + int64_t z0 = this->z + this->zstep * z; + int64_t z1 = that->z + that->zstep * z; + + d2[coffset2 + yoffset2 + xoffset2 + z] = d1[coffset1 + yoffset1 + xoffset1 + z1] + d0[coffset0 + yoffset0 + xoffset0 + z0]; + } + } + } + } + + return res, 0; + } + + void __release(mat* this) + { + if (this->data) + scf__auto_freep(&this->data, NULL); + } +}; + + +int main() +{ + double a[4] = {1, 2, 3, 4}; + double b[4] = {5, 6, 7, 8}; + + mat* m0; + mat* m1; + mat* m2; + + m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)b, 1, 2, 2, 1); + m1 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1); + + m2 = m0 + m1; + + double* dd = (double*)m2->data; + + int i; + for (i = 0; i < 4; i++) + printf("m2: %lf\n", dd[i]); + + return 0; +} diff --git a/native/x64/scf_x64.c b/native/x64/scf_x64.c index 7ec69c3..300b9dc 100644 --- a/native/x64/scf_x64.c +++ b/native/x64/scf_x64.c @@ -52,7 +52,6 @@ static void _x64_argv_rabi(scf_function_t* f) int size = x64_variable_size(v); if (is_float) { - if (f->args_float < X64_ABI_FLOAT_NB) { v->rabi = x64_find_register_type_id_bytes(is_float, x64_abi_float_regs[f->args_float], size); @@ -61,6 +60,7 @@ static void _x64_argv_rabi(scf_function_t* f) f->args_float++; continue; } + } else if (f->args_int < X64_ABI_NB) { v->rabi = x64_find_register_type_id_bytes(is_float, x64_abi_regs[f->args_int], size); @@ -79,12 +79,12 @@ static void _x64_argv_rabi(scf_function_t* f) static int _x64_function_init(scf_function_t* f, scf_vector_t* local_vars) { scf_variable_t* v; + int i; int ret = x64_registers_init(); if (ret < 0) return ret; - int i; for (i = 0; i < local_vars->size; i++) { v = local_vars->data[i]; @@ -109,11 +109,13 @@ static int _x64_function_init(scf_function_t* f, scf_vector_t* local_vars) local_vars_size += size; - if (local_vars_size & 0x7) - local_vars_size = (local_vars_size + 7) >> 3 << 3; + if (v->nb_dimentions > 0) + local_vars_size = (local_vars_size + 0xf) & ~0xf; + else + local_vars_size = (local_vars_size + 0x7) & ~0x7; - v->bp_offset = -local_vars_size; - v->local_flag = 1; + v->bp_offset = -local_vars_size; + v->local_flag = 1; } return local_vars_size; @@ -136,9 +138,12 @@ static int _x64_save_rabi(scf_function_t* f) scf_register_t* xmm1; scf_register_t* xmm2; scf_register_t* xmm3; + scf_register_t* xmm4; + scf_register_t* xmm5; + scf_register_t* xmm6; + scf_register_t* xmm7; if (f->vargs_flag) { - inst = NULL; mov = x64_find_OpCode(SCF_X64_MOV, 8,8, SCF_X64_G2E); @@ -171,11 +176,19 @@ static int _x64_save_rabi(scf_function_t* f) xmm1 = x64_find_register("xmm1"); xmm2 = x64_find_register("xmm2"); xmm3 = x64_find_register("xmm3"); - - X64_SAVE_RABI(-56, xmm0); - X64_SAVE_RABI(-64, xmm1); - X64_SAVE_RABI(-72, xmm2); - X64_SAVE_RABI(-80, xmm3); + xmm4 = x64_find_register("xmm4"); + xmm5 = x64_find_register("xmm5"); + xmm6 = x64_find_register("xmm6"); + xmm7 = x64_find_register("xmm7"); + + X64_SAVE_RABI(-56, xmm0); + X64_SAVE_RABI(-64, xmm1); + X64_SAVE_RABI(-72, xmm2); + X64_SAVE_RABI(-80, xmm3); + X64_SAVE_RABI(-88, xmm4); + X64_SAVE_RABI(-96, xmm5); + X64_SAVE_RABI(-104, xmm6); + X64_SAVE_RABI(-112, xmm7); } return 0; @@ -217,6 +230,10 @@ static int _x64_function_finish(scf_function_t* f) l = scf_list_tail(&bb->code_list_head); end = scf_list_data(l, scf_3ac_code_t, list); + int err = x64_pop_callee_regs(end, f); + if (err < 0) + return err; + if (f->bp_used_flag || f->vla_flag || f->call_flag) { inst = x64_make_inst_G2E(mov, rsp, rbp); @@ -230,16 +247,8 @@ static int _x64_function_finish(scf_function_t* f) bb ->code_bytes += inst->len; } - int err = x64_pop_callee_regs(end, f); - if (err < 0) - return err; - f->init_code_bytes = 0; - err = x64_push_callee_regs(f->init_code, f); - if (err < 0) - return err; - uint32_t local = f->bp_used_flag ? f->local_vars_size : 0; if (f->bp_used_flag || f->vla_flag || f->call_flag) { @@ -252,13 +261,8 @@ static int _x64_function_finish(scf_function_t* f) X64_INST_ADD_CHECK(f->init_code->instructions, inst, NULL); f->init_code_bytes += inst->len; - if (f->callee_saved_size & 0xf) { - if (!(local & 0xf)) - local += 8; - } else { - if ((local & 0xf)) - local += 8; - } + if ((f->callee_saved_size + local) & 0xf) + local += 8; scf_logd("### local: %#x, local_vars_size: %#x, callee_saved_size: %#x\n", local, f->local_vars_size, f->callee_saved_size); @@ -267,11 +271,15 @@ static int _x64_function_finish(scf_function_t* f) X64_INST_ADD_CHECK(f->init_code->instructions, inst, NULL); f->init_code_bytes += inst->len; - int err = _x64_save_rabi(f); + err = _x64_save_rabi(f); if (err < 0) return err; } + err = x64_push_callee_regs(f->init_code, f); + if (err < 0) + return err; + inst = x64_make_inst(ret, 8); X64_INST_ADD_CHECK(end->instructions, inst, NULL); end->inst_bytes += inst->len; @@ -1120,11 +1128,13 @@ int scf_x64_select_inst(scf_native_t* ctx, scf_function_t* f) scf_logi("---------- %s() ------------\n", f->node.w->text->data); + scf_variable_t* v; int i; + for (i = 0; i < local_vars->size; i++) { - scf_variable_t* v = local_vars->data[i]; - assert(v->w); + v = local_vars->data[i]; + assert(v->w); scf_logd("v: %p, name: %s_%d_%d, size: %d, bp_offset: %d, arg_flag: %d\n", v, v->w->text->data, v->w->line, v->w->pos, scf_variable_size(v), v->bp_offset, v->arg_flag); diff --git a/native/x64/scf_x64_inst.c b/native/x64/scf_x64_inst.c index 51c6b9b..9921814 100644 --- a/native/x64/scf_x64_inst.c +++ b/native/x64/scf_x64_inst.c @@ -528,8 +528,10 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c) X64_INST_ADD_CHECK(c->instructions, inst, NULL); scf_register_t* saved_regs[X64_ABI_CALLER_SAVES_NB]; + scf_register_t* drop_regs [X64_ABI_CALLER_SAVES_NB]; + int n_drops = 0; - int save_size = x64_caller_save_regs(c, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs); + int save_size = x64_caller_save_regs(c, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs, drop_regs, &n_drops); if (save_size < 0) { scf_loge("\n"); return save_size; @@ -593,6 +595,8 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c) X64_INST_ADD_CHECK(c->instructions, inst, NULL); } + x64_drop_regs(drop_regs, n_drops); + int nb_updated = 0; scf_register_t* updated_regs[X64_ABI_RET_NB * 2]; diff --git a/native/x64/scf_x64_opcode.c b/native/x64/scf_x64_opcode.c index 95092c4..f67b5d2 100644 --- a/native/x64/scf_x64_opcode.c +++ b/native/x64/scf_x64_opcode.c @@ -1,7 +1,6 @@ #include"scf_x64.h" -scf_x64_OpCode_t x64_OpCodes[] = -{ +scf_x64_OpCode_t x64_OpCodes[] = { {SCF_X64_PUSH, "push", 1, {0x50, 0x0, 0x0},1, 8,8, SCF_X64_G, 0,0, 0,{0,0}}, {SCF_X64_POP, "pop", 1, {0x58, 0x0, 0x0},1, 8,8, SCF_X64_G, 0,0, 0,{0,0}}, diff --git a/native/x64/scf_x64_peephole.c b/native/x64/scf_x64_peephole.c index 57f906c..147a303 100644 --- a/native/x64/scf_x64_peephole.c +++ b/native/x64/scf_x64_peephole.c @@ -3,18 +3,20 @@ #include"scf_basic_block.h" #include"scf_3ac.h" -static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst) +static int _x64_peephole_mov(scf_vector_t* save_insts, scf_vector_t* peep_insts, scf_instruction_t* inst) { - scf_3ac_code_t* c = inst->c; - scf_basic_block_t* bb = c->basic_block; + scf_3ac_code_t* c = inst->c; + scf_basic_block_t* bb = c->basic_block; - scf_instruction_t* inst2; - scf_instruction_t* std; - scf_x64_OpCode_t* OpCode; + scf_register_t* r0; + scf_register_t* r1; + scf_x64_OpCode_t* OpCode; + scf_instruction_t* inst2; + scf_instruction_t* std; int j; - for (j = std_insts->size - 1; j >= 0; j--) { - std = std_insts->data[j]; + for (j = peep_insts->size - 1; j >= 0; j--) { + std = peep_insts->data[j]; #if 0 scf_loge("std j: %d\n", j); scf_3ac_code_print(std->c, NULL); @@ -67,12 +69,13 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst) return X64_PEEPHOLE_DEL; } - assert(0 == scf_vector_del(std_insts, std)); + assert(0 == scf_vector_del(peep_insts, std)); if (std->nb_used > 0) continue; assert(0 == scf_vector_del(std->c->instructions, std)); + assert(0 == scf_vector_del(save_insts, std)); free(std); std = NULL; @@ -175,20 +178,16 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst) } } else if (scf_inst_data_same(&std->src, &inst->dst)) { - assert(0 == scf_vector_del(std_insts, std)); + assert(0 == scf_vector_del(peep_insts, std)); } else if (x64_inst_data_is_reg(&std->src)) { - scf_register_t* r0; - scf_register_t* r1; - if (x64_inst_data_is_reg(&inst->dst)) { - r0 = std ->src.base; r1 = inst->dst.base; if (X64_COLOR_CONFLICT(r0->color, r1->color)) - assert(0 == scf_vector_del(std_insts, std)); + assert(0 == scf_vector_del(peep_insts, std)); } } else if (x64_inst_data_is_reg(&std->dst)) { @@ -201,33 +200,41 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst) } } - assert(0 == scf_vector_add_unique(std_insts, inst)); + if (x64_inst_data_is_reg(&inst->dst)) { + r1 = inst->dst.base; + + for (j = peep_insts->size - 1; j >= 0; j--) { + std = peep_insts->data[j]; + + if ((std->src.flag && (std->src.base == r1 || std->src.index == r1)) + || (std->dst.flag && (std->dst.base == r1 || std->dst.index == r1))) + assert(0 == scf_vector_del(peep_insts, std)); + } + } + + assert(0 == scf_vector_add_unique(peep_insts, inst)); return 0; } -static int _x64_peephole_cmp(scf_vector_t* std_insts, scf_instruction_t* inst) +static int _x64_peephole_cmp(scf_vector_t* save_insts, scf_vector_t* peep_insts, scf_instruction_t* inst) { - scf_3ac_code_t* c = inst->c; - scf_basic_block_t* bb = c->basic_block; - - scf_instruction_t* inst2; - scf_instruction_t* std; + scf_3ac_code_t* c = inst->c; + scf_basic_block_t* bb = c->basic_block; + scf_instruction_t* inst2; + scf_instruction_t* std; int j; - for (j = std_insts->size - 1; j >= 0; j--) { - std = std_insts->data[j]; + for (j = peep_insts->size - 1; j >= 0; j--) { + std = peep_insts->data[j]; if (SCF_X64_LEA == std->OpCode->type) break; if (inst->src.flag) { - if (scf_inst_data_same(&inst->src, &std->src)) - inst->src.base = std->dst.base; else if (scf_inst_data_same(&inst->src, &std->dst)) - inst->src.base = std->src.base; else goto check; @@ -244,11 +251,9 @@ static int _x64_peephole_cmp(scf_vector_t* std_insts, scf_instruction_t* inst) } else if (inst->dst.flag) { if (scf_inst_data_same(&inst->dst, &std->src)) - inst->dst.base = std->dst.base; else if (scf_inst_data_same(&inst->dst, &std->dst)) - inst->dst.base = std->src.base; else goto check; @@ -295,42 +300,6 @@ check: return 0; } -static int _x64_peephole_movx(scf_vector_t* std_insts, scf_instruction_t* inst) -{ - if (!x64_inst_data_is_reg(&inst->src) || !x64_inst_data_is_reg(&inst->dst)) { - scf_vector_clear(std_insts, NULL); - return 0; - } - - scf_3ac_code_t* c = inst->c; - scf_basic_block_t* bb = c->basic_block; - scf_instruction_t* std; - scf_x64_OpCode_t* OpCode; - int j; - - for (j = std_insts->size - 1; j >= 0; j--) { - std = std_insts->data[j]; - - if (scf_inst_data_same(&std->dst, &inst->src)) { - std->nb_used++; - - if (std->OpCode == inst->OpCode - && scf_inst_data_same(&std->src, &inst->src) - && scf_inst_data_same(&std->dst, &inst->dst)) { - - assert(0 == scf_vector_del(inst->c->instructions, inst)); - - free(inst); - inst = NULL; - return X64_PEEPHOLE_DEL; - } - } - } - - assert(0 == scf_vector_add_unique(std_insts, inst)); - return 0; -} - static int x64_inst_is_useful(scf_instruction_t* inst, scf_instruction_t* std) { if (scf_inst_data_same(&inst->dst, &std->src)) @@ -478,15 +447,15 @@ static int __x64_inst_useful_bb_next(scf_basic_block_t* bb, void* data, scf_vect return 0; } -static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f) +static int _x64_peephole_function(scf_vector_t* save_insts, scf_function_t* f) { scf_instruction_t* inst; scf_basic_block_t* bb; scf_3ac_code_t* c; int i; - for (i = tmp_insts->size - 1; i >= 0; i--) { - inst = tmp_insts->data[i]; + for (i = save_insts->size - 1; i >= 0; i--) { + inst = save_insts->data[i]; if (SCF_X64_MOV != inst->OpCode->type) continue; @@ -510,7 +479,7 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f) continue; assert(0 == scf_vector_del(c->instructions, inst)); - assert(0 == scf_vector_del(tmp_insts, inst)); + assert(0 == scf_vector_del(save_insts, inst)); free(inst); inst = NULL; @@ -518,8 +487,8 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f) int n_locals = 0; - for (i = 0; i < tmp_insts->size; i++) { - inst = tmp_insts->data[i]; + for (i = 0; i < save_insts->size; i++) { + inst = save_insts->data[i]; if (x64_inst_data_is_local(&inst->src) || x64_inst_data_is_local(&inst->dst)) n_locals++; @@ -536,7 +505,6 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f) int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) { - scf_instruction_t* std; scf_instruction_t* inst; scf_basic_block_t* bb; scf_3ac_operand_t* dst; @@ -544,16 +512,16 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) scf_list_t* l; scf_list_t* l2; - scf_vector_t* std_insts; - scf_vector_t* tmp_insts; // instructions for register or local variable + scf_vector_t* peep_insts; + scf_vector_t* save_insts; - std_insts = scf_vector_alloc(); - if (!std_insts) + peep_insts = scf_vector_alloc(); + if (!peep_insts) return -ENOMEM; - tmp_insts = scf_vector_alloc(); - if (!tmp_insts) { - scf_vector_free(tmp_insts); + save_insts = scf_vector_alloc(); + if (!save_insts) { + scf_vector_free(save_insts); return -ENOMEM; } @@ -569,7 +537,7 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) bb = scf_list_data(l, scf_basic_block_t, list); if (bb->jmp_flag) { - scf_vector_clear(std_insts, NULL); + scf_vector_clear(peep_insts, NULL); l2 = scf_list_head(&bb->code_list_head); c = scf_list_data(l2, scf_3ac_code_t, list); @@ -582,9 +550,8 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) continue; } - if (bb->jmp_dst_flag) { - scf_vector_clear(std_insts, NULL); - } + if (bb->jmp_dst_flag) + scf_vector_clear(peep_insts, NULL); for (l2 = scf_list_head(&bb->code_list_head); l2 != scf_list_sentinel(&bb->code_list_head); l2 = scf_list_next(l2)) { @@ -603,26 +570,26 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) // scf_instruction_print(inst); ret = 0; - switch (inst->OpCode->type) { - + switch (inst->OpCode->type) + { case SCF_X64_CMP: case SCF_X64_TEST: - ret = _x64_peephole_cmp(std_insts, inst); + ret = _x64_peephole_cmp(save_insts, peep_insts, inst); break; case SCF_X64_MOV: - ret = _x64_peephole_mov(std_insts, inst); + ret = _x64_peephole_mov(save_insts, peep_insts, inst); break; case SCF_X64_LEA: - ret = scf_vector_add_unique(std_insts, inst); + ret = scf_vector_add_unique(peep_insts, inst); break; case SCF_X64_MOVSS: case SCF_X64_MOVSD: break; default: - scf_vector_clear(std_insts, NULL); + scf_vector_clear(peep_insts, NULL); break; }; @@ -632,7 +599,7 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) if (X64_PEEPHOLE_DEL == ret) continue; - ret = scf_vector_add(tmp_insts, inst); + ret = scf_vector_add(save_insts, inst); if (ret < 0) goto error; i++; @@ -640,9 +607,9 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f) } } - ret = _x64_peephole_function(tmp_insts, f); + ret = _x64_peephole_function(save_insts, f); error: - scf_vector_free(tmp_insts); - scf_vector_free(std_insts); + scf_vector_free(save_insts); + scf_vector_free(peep_insts); return ret; } diff --git a/native/x64/scf_x64_reg.c b/native/x64/scf_x64_reg.c index efcc9aa..a94ef7c 100644 --- a/native/x64/scf_x64_reg.c +++ b/native/x64/scf_x64_reg.c @@ -95,7 +95,7 @@ void x64_registers_print() } } -int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs) +int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs, scf_register_t** drop_regs, int* n_drops) { scf_basic_block_t* bb = c->basic_block; scf_dag_node_t* dn; @@ -119,6 +119,8 @@ int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int for (j = 0; j < nb_regs; j++) { r2 = x64_find_register(regs[j]); + int drop_flag = 0; + for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) { r = &(x64_registers[i]); @@ -148,11 +150,15 @@ int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int if (k < r->dag_nodes->size) break; + drop_flag = 1; } } - if (i == sizeof(x64_registers) / sizeof(x64_registers[0])) + if (i == sizeof(x64_registers) / sizeof(x64_registers[0])) { + if (drop_flag) + drop_regs[(*n_drops)++] = r2; continue; + } if (X64_COLOR_TYPE(r2->color)) { if (stack_size > 0) @@ -1181,6 +1187,46 @@ void x64_call_rabi(int* p_nints, int* p_nfloats, scf_3ac_code_t* c) *p_nfloats = nfloats; } +int x64_drop_regs(scf_register_t** drop_regs, int n_drops) +{ + scf_register_t* r2; + scf_register_t* r; + scf_dag_node_t* dn; + scf_variable_t* v; + + int N = sizeof(x64_registers) / sizeof(x64_registers[0]); + int i; + int j; + + for (j = 0; j < N; j++) { + r2 = &(x64_registers[j]); + + if (!r2->dag_nodes || r2->dag_nodes->size <= 0) + continue; + + for (i = 0; i < n_drops; i++) { + r = drop_regs[i]; + + if (X64_COLOR_CONFLICT(r2->color, r->color)) + break; + } + + if (i >= n_drops) + continue; + + for (i = 0; i < r2->dag_nodes->size; i++) { + dn = r2->dag_nodes->data[i]; + + dn->color = -1; + dn->loaded = 0; + } + + r2->dag_nodes->size = 0; + } + + return 0; +} + int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f) { scf_x64_OpCode_t* push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G); @@ -1217,7 +1263,6 @@ int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f) int x64_pop_callee_regs(scf_3ac_code_t* c, scf_function_t* f) { scf_x64_OpCode_t* pop = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G); - scf_basic_block_t* bb = c->basic_block; scf_instruction_t* inst; diff --git a/native/x64/scf_x64_reg.h b/native/x64/scf_x64_reg.h index 0e59579..9be5f65 100644 --- a/native/x64/scf_x64_reg.h +++ b/native/x64/scf_x64_reg.h @@ -112,7 +112,9 @@ int x64_save_var2(scf_dag_node_t* dn, scf_register_t* r, scf_3ac int x64_push_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs); int x64_pop_regs (scf_vector_t* instructions, scf_register_t** regs, int nb_regs, scf_register_t** updated_regs, int nb_updated); -int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs); +int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs, scf_register_t** drop_regs, int* n_drops); + +int x64_drop_regs(scf_register_t** drop_regs, int n_drops); int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f); int x64_pop_callee_regs (scf_3ac_code_t* c, scf_function_t* f); diff --git a/parse/Makefile b/parse/Makefile index 594763d..064f593 100644 --- a/parse/Makefile +++ b/parse/Makefile @@ -168,7 +168,7 @@ CFILES += scf_dfa_async.c CFILES += scf_dfa_block.c -CFLAGS += -g -O3 +CFLAGS += -g #-O3 #CFLAGS += -Wall CFLAGS += -I../util CFLAGS += -I../core -- 2.25.1