1, fix: memory layout of stack error int X64,
authoryu.dongliang <18588496441@163.com>
Sun, 14 Jun 2026 11:09:00 +0000 (19:09 +0800)
committeryu.dongliang <18588496441@163.com>
Sun, 14 Jun 2026 11:09:05 +0000 (19:09 +0800)
2, fix: x64 peephole: when a register is changed, then memory data based on it invalid.

core/scf_optimizer.c
examples/mat.c
examples/mat2.c [new file with mode: 0644]
native/x64/scf_x64.c
native/x64/scf_x64_inst.c
native/x64/scf_x64_opcode.c
native/x64/scf_x64_peephole.c
native/x64/scf_x64_reg.c
native/x64/scf_x64_reg.h
parse/Makefile

index 80099abbd47adbad279f3791c5a338ea44199900..d69fa201ea6aee25f888dc9c173c7dfafff88373 100644 (file)
@@ -109,6 +109,9 @@ int scf_optimize(scf_ast_t* ast, scf_vector_t* functions)
                if (!f->node.define_flag)
                        continue;
 
+               if (strcmp(f->node.w->text->data, "main"))
+                       continue;
+
                printf("\n");
                scf_logi("------- %s() ------\n", f->node.w->text->data);
 
index 2db49a042eebfce89e360d5d9ec93576e3e0a9b2..0395a17d0da5bbc9b57e4beb62c960b263da23e9 100644 (file)
@@ -66,64 +66,6 @@ struct mat
                return 0;
        }
 
-       mat*, int operator+(mat* this, mat* that)
-       {
-               mat* res;
-
-               res = new mat(MAT_TYPE_DOUBLE, NULL, this->depth, this->width, this->height, this->count);
-               if (!res)
-                       return NULL, -1;
-
-               int64_t c;
-               int64_t y;
-               int64_t x;
-               int64_t z;
-
-               double* d0 = (double*)this->data;
-               double* d1 = (double*)that->data;
-               double* d2 = (double*)res->data;
-
-               for (c = 0; c < this->count; c++) {
-
-                       int64_t c0 = this->c + this->cstep * c;
-                       int64_t c1 = that->c + that->cstep * c;
-
-                       int64_t coffset0 = c0 * this->height * this->xstride;
-                       int64_t coffset1 = c1 * that->height * that->xstride;
-                       int64_t coffset2 = c  * res->height  * res->xstride;
-
-                       for (y = 0; y < this->height; y++) {
-
-                               int64_t y0 = this->y + this->ystep * y;
-                               int64_t y1 = that->y + that->ystep * y;
-
-                               int64_t yoffset0 = y0 * this->xstride * this->depth;
-                               int64_t yoffset1 = y1 * that->xstride * that->depth;
-                               int64_t yoffset2 = y  * res->xstride  * res->depth;
-
-                               for (x = 0; x < this->width; x++) {
-
-                                       int64_t x0 = this->x + this->xstep * x;
-                                       int64_t x1 = that->x + that->xstep * x;
-
-                                       int64_t xoffset0 = x0 * this->depth;
-                                       int64_t xoffset1 = x1 * that->depth;
-                                       int64_t xoffset2 = x  * res->depth;
-
-                                       for (z = 0; z < this->depth; z++) {
-
-                                               int64_t z0 = this->z + this->zstep * z;
-                                               int64_t z1 = that->z + that->zstep * z;
-
-                                               d2[coffset2 + yoffset2 + xoffset2 + z] = d1[coffset1 + yoffset1 + xoffset1 + z1] + d0[coffset0 + yoffset0 + xoffset0 + z0];
-                                       }
-                               }
-                       }
-               }
-
-               return res, 0;
-       }
-
        void __release(mat* this)
        {
                if (this->data)
@@ -135,22 +77,10 @@ struct mat
 int main()
 {
        double a[4] = {1, 2,  3,  4};
-       double b[4] = {5, 6,  7,  8};
-       double c[4] = {9, 10, 11, 12};
-
-       mat* m0;
-       mat* m1;
-       mat* m2;
-       mat* m3;
-       mat* m4;
-
-       m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1);
-       m1 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)b, 1, 2, 2, 1);
-       m2 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)c, 1, 2, 2, 1);
 
-       m3 = m0 + m1 + m2;
+       mat* m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1);
 
-       double* dd = (double*)m3->data;
+       double* dd = (double*)m0->data;
 
        int i;
        for (i = 0; i < 4; i++)
diff --git a/examples/mat2.c b/examples/mat2.c
new file mode 100644 (file)
index 0000000..62922ef
--- /dev/null
@@ -0,0 +1,156 @@
+
+#include"../lib/scf_capi.c"
+
+const int    MAT_TYPE_NONE   = 0;
+const int    MAT_TYPE_U8     = 1;
+const int    MAT_TYPE_DOUBLE = 2;
+
+struct mat;
+
+struct mat
+{
+       uint8_t* data;
+
+       int64_t  z;
+       int64_t  x;
+       int64_t  y;
+       int64_t  c;
+
+       int64_t  depth;
+       int64_t  width;
+       int64_t  height;
+       int64_t  count;
+
+       int64_t  xstep;
+       int64_t  ystep;
+       int64_t  zstep;
+       int64_t  cstep;
+
+       int64_t  xstride;
+       int      type;
+
+       int __init(mat* this, int type, uint8_t* data, int depth, int width, int height, int count)
+       {
+               this->z = 0;
+               this->x = 0;
+               this->y = 0;
+               this->c = 0;
+
+               this->depth  = depth;
+               this->width  = width;
+               this->height = height;
+               this->count  = count;
+
+               this->xstep  = 1;
+               this->ystep  = 1;
+               this->zstep  = 1;
+               this->cstep  = 1;
+               this->type   = type;
+
+               this->xstride = this->width;
+               int64_t size  = this->depth * this->width * this->height * this->count;
+
+               if (MAT_TYPE_U8 == type) {
+
+               } else if (MAT_TYPE_DOUBLE == type) {
+                       size *= sizeof(double);
+               } else
+                       return -1;
+
+               this->data = scf__auto_malloc(size);
+               if (!this->data)
+                       return -1;
+
+               if (data)
+                       memcpy(this->data, data, size);
+               return 0;
+       }
+
+       mat*, int operator+(mat* this, mat* that)
+       {
+               mat* res;
+
+               res = new mat(MAT_TYPE_DOUBLE, NULL, this->depth, this->width, this->height, this->count);
+               if (!res)
+                       return NULL, -1;
+
+               int64_t c;
+               int64_t y;
+               int64_t x;
+               int64_t z;
+
+               double* d0 = (double*)this->data;
+               double* d1 = (double*)that->data;
+               double* d2 = (double*)res->data;
+
+               for (c = 0; c < this->count; c++) {
+
+                       int64_t c0 = this->c + this->cstep * c;
+                       int64_t c1 = that->c + that->cstep * c;
+
+                       int64_t coffset0 = c0 * this->height * this->xstride;
+                       int64_t coffset1 = c1 * that->height * that->xstride;
+                       int64_t coffset2 = c  * res->height  * res->xstride;
+
+                       for (y = 0; y < this->height; y++) {
+
+                               int64_t y0 = this->y + this->ystep * y;
+                               int64_t y1 = that->y + that->ystep * y;
+
+                               int64_t yoffset0 = y0 * this->xstride * this->depth;
+                               int64_t yoffset1 = y1 * that->xstride * that->depth;
+                               int64_t yoffset2 = y  * res->xstride  * res->depth;
+
+                               for (x = 0; x < this->width; x++) {
+
+                                       int64_t x0 = this->x + this->xstep * x;
+                                       int64_t x1 = that->x + that->xstep * x;
+
+                                       int64_t xoffset0 = x0 * this->depth;
+                                       int64_t xoffset1 = x1 * that->depth;
+                                       int64_t xoffset2 = x  * res->depth;
+
+                                       for (z = 0; z < this->depth; z++) {
+
+                                               int64_t z0 = this->z + this->zstep * z;
+                                               int64_t z1 = that->z + that->zstep * z;
+
+                                               d2[coffset2 + yoffset2 + xoffset2 + z] = d1[coffset1 + yoffset1 + xoffset1 + z1] + d0[coffset0 + yoffset0 + xoffset0 + z0];
+                                       }
+                               }
+                       }
+               }
+
+               return res, 0;
+       }
+
+       void __release(mat* this)
+       {
+               if (this->data)
+                       scf__auto_freep(&this->data, NULL);
+       }
+};
+
+
+int main()
+{
+       double a[4] = {1, 2,  3,  4};
+       double b[4] = {5, 6,  7,  8};
+
+       mat* m0;
+       mat* m1;
+       mat* m2;
+
+       m0 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)b, 1, 2, 2, 1);
+       m1 = new mat(MAT_TYPE_DOUBLE, (uint8_t*)a, 1, 2, 2, 1);
+
+       m2 = m0 + m1;
+
+       double* dd = (double*)m2->data;
+
+       int i;
+       for (i = 0; i < 4; i++)
+               printf("m2: %lf\n", dd[i]);
+
+       return 0;
+}
index 7ec69c31bb135c8e628d4e96946559daf9352a76..300b9dc0dee77d8f620a48dfdae35d99a05fc380 100644 (file)
@@ -52,7 +52,6 @@ static void _x64_argv_rabi(scf_function_t* f)
                int size     = x64_variable_size(v);
 
                if (is_float) {
-
                        if (f->args_float < X64_ABI_FLOAT_NB) {
 
                                v->rabi       = x64_find_register_type_id_bytes(is_float, x64_abi_float_regs[f->args_float], size);
@@ -61,6 +60,7 @@ static void _x64_argv_rabi(scf_function_t* f)
                                f->args_float++;
                                continue;
                        }
+
                } else if (f->args_int < X64_ABI_NB) {
 
                        v->rabi       = x64_find_register_type_id_bytes(is_float, x64_abi_regs[f->args_int], size);
@@ -79,12 +79,12 @@ static void _x64_argv_rabi(scf_function_t* f)
 static int _x64_function_init(scf_function_t* f, scf_vector_t* local_vars)
 {
        scf_variable_t* v;
+       int i;
 
        int ret = x64_registers_init();
        if (ret < 0)
                return ret;
 
-       int i;
        for (i = 0; i < local_vars->size; i++) {
                v  =        local_vars->data[i];
 
@@ -109,11 +109,13 @@ static int _x64_function_init(scf_function_t* f, scf_vector_t* local_vars)
 
                local_vars_size += size;
 
-               if (local_vars_size & 0x7)
-                       local_vars_size = (local_vars_size + 7) >> 3 << 3;
+               if (v->nb_dimentions > 0)
+                       local_vars_size = (local_vars_size + 0xf) & ~0xf;
+               else
+                       local_vars_size = (local_vars_size + 0x7) & ~0x7;
 
-               v->bp_offset     = -local_vars_size;
-               v->local_flag    = 1;
+               v->bp_offset  = -local_vars_size;
+               v->local_flag = 1;
        }
 
        return local_vars_size;
@@ -136,9 +138,12 @@ static int _x64_save_rabi(scf_function_t* f)
        scf_register_t* xmm1;
        scf_register_t* xmm2;
        scf_register_t* xmm3;
+       scf_register_t* xmm4;
+       scf_register_t* xmm5;
+       scf_register_t* xmm6;
+       scf_register_t* xmm7;
 
        if (f->vargs_flag) {
-
                inst = NULL;
                mov  = x64_find_OpCode(SCF_X64_MOV, 8,8, SCF_X64_G2E);
 
@@ -171,11 +176,19 @@ static int _x64_save_rabi(scf_function_t* f)
                xmm1 = x64_find_register("xmm1");
                xmm2 = x64_find_register("xmm2");
                xmm3 = x64_find_register("xmm3");
-
-               X64_SAVE_RABI(-56, xmm0);
-               X64_SAVE_RABI(-64, xmm1);
-               X64_SAVE_RABI(-72, xmm2);
-               X64_SAVE_RABI(-80, xmm3);
+               xmm4 = x64_find_register("xmm4");
+               xmm5 = x64_find_register("xmm5");
+               xmm6 = x64_find_register("xmm6");
+               xmm7 = x64_find_register("xmm7");
+
+               X64_SAVE_RABI(-56,  xmm0);
+               X64_SAVE_RABI(-64,  xmm1);
+               X64_SAVE_RABI(-72,  xmm2);
+               X64_SAVE_RABI(-80,  xmm3);
+               X64_SAVE_RABI(-88,  xmm4);
+               X64_SAVE_RABI(-96,  xmm5);
+               X64_SAVE_RABI(-104, xmm6);
+               X64_SAVE_RABI(-112, xmm7);
        }
 
        return 0;
@@ -217,6 +230,10 @@ static int _x64_function_finish(scf_function_t* f)
        l   = scf_list_tail(&bb->code_list_head);
        end = scf_list_data(l, scf_3ac_code_t, list);
 
+       int err = x64_pop_callee_regs(end, f);
+       if (err < 0)
+               return err;
+
        if (f->bp_used_flag || f->vla_flag || f->call_flag) {
 
                inst = x64_make_inst_G2E(mov, rsp, rbp);
@@ -230,16 +247,8 @@ static int _x64_function_finish(scf_function_t* f)
                bb ->code_bytes += inst->len;
        }
 
-       int err = x64_pop_callee_regs(end, f);
-       if (err < 0)
-               return err;
-
        f->init_code_bytes = 0;
 
-       err = x64_push_callee_regs(f->init_code, f);
-       if (err < 0)
-               return err;
-
        uint32_t local = f->bp_used_flag ? f->local_vars_size : 0;
 
        if (f->bp_used_flag || f->vla_flag || f->call_flag) {
@@ -252,13 +261,8 @@ static int _x64_function_finish(scf_function_t* f)
                X64_INST_ADD_CHECK(f->init_code->instructions, inst, NULL);
                f->init_code_bytes += inst->len;
 
-               if (f->callee_saved_size & 0xf) {
-                       if (!(local & 0xf))
-                               local += 8;
-               } else {
-                       if ((local & 0xf))
-                               local += 8;
-               }
+               if ((f->callee_saved_size + local) & 0xf)
+                       local += 8;
 
                scf_logd("### local: %#x, local_vars_size: %#x, callee_saved_size: %#x\n",
                                local, f->local_vars_size, f->callee_saved_size);
@@ -267,11 +271,15 @@ static int _x64_function_finish(scf_function_t* f)
                X64_INST_ADD_CHECK(f->init_code->instructions, inst, NULL);
                f->init_code_bytes += inst->len;
 
-               int err = _x64_save_rabi(f);
+               err = _x64_save_rabi(f);
                if (err < 0)
                        return err;
        }
 
+       err = x64_push_callee_regs(f->init_code, f);
+       if (err < 0)
+               return err;
+
        inst = x64_make_inst(ret, 8);
        X64_INST_ADD_CHECK(end->instructions, inst, NULL);
        end->inst_bytes += inst->len;
@@ -1120,11 +1128,13 @@ int scf_x64_select_inst(scf_native_t* ctx, scf_function_t* f)
 
        scf_logi("---------- %s() ------------\n", f->node.w->text->data);
 
+       scf_variable_t* v;
        int i;
+
        for (i = 0; i < local_vars->size; i++) {
-               scf_variable_t* v = local_vars->data[i];
-               assert(v->w);
+               v  =        local_vars->data[i];
 
+               assert(v->w);
                scf_logd("v: %p, name: %s_%d_%d, size: %d, bp_offset: %d, arg_flag: %d\n",
                                v, v->w->text->data, v->w->line, v->w->pos,
                                scf_variable_size(v), v->bp_offset, v->arg_flag);
index 51c6b9b49ddd0c6681883d9276c34ec6145f31e9..992181401b3f18617dc06634a8bcc1b274aad5c4 100644 (file)
@@ -528,8 +528,10 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c)
        X64_INST_ADD_CHECK(c->instructions, inst, NULL);
 
        scf_register_t* saved_regs[X64_ABI_CALLER_SAVES_NB];
+       scf_register_t* drop_regs [X64_ABI_CALLER_SAVES_NB];
+       int n_drops = 0;
 
-       int save_size = x64_caller_save_regs(c, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs);
+       int save_size = x64_caller_save_regs(c, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs, drop_regs, &n_drops);
        if (save_size < 0) {
                scf_loge("\n");
                return save_size;
@@ -593,6 +595,8 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c)
                X64_INST_ADD_CHECK(c->instructions, inst, NULL);
        }
 
+       x64_drop_regs(drop_regs, n_drops);
+
        int nb_updated = 0;
        scf_register_t* updated_regs[X64_ABI_RET_NB * 2];
 
index 95092c4fde3185c99c8905e434c3cd9095b10824..f67b5d218ac466964fb65e2af6b30dcdfc6269d5 100644 (file)
@@ -1,7 +1,6 @@
 #include"scf_x64.h"
 
-scf_x64_OpCode_t       x64_OpCodes[] =
-{
+scf_x64_OpCode_t       x64_OpCodes[] = {
        {SCF_X64_PUSH, "push", 1, {0x50, 0x0, 0x0},1,  8,8, SCF_X64_G,   0,0, 0,{0,0}},
        {SCF_X64_POP,  "pop",  1, {0x58, 0x0, 0x0},1,  8,8, SCF_X64_G,   0,0, 0,{0,0}},
 
index 57f906cf9a10d8217b217dae11d1e4b4fc81970c..147a303d3095392709c09c452179a84aa446f7e9 100644 (file)
@@ -3,18 +3,20 @@
 #include"scf_basic_block.h"
 #include"scf_3ac.h"
 
-static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst)
+static int _x64_peephole_mov(scf_vector_t* save_insts, scf_vector_t* peep_insts, scf_instruction_t* inst)
 {
-       scf_3ac_code_t*    c  = inst->c;
-       scf_basic_block_t* bb = c->basic_block;
+       scf_3ac_code_t*     c  = inst->c;
+       scf_basic_block_t*  bb = c->basic_block;
 
-       scf_instruction_t* inst2;
-       scf_instruction_t* std;
-       scf_x64_OpCode_t*  OpCode;
+       scf_register_t*     r0;
+       scf_register_t*     r1;
+       scf_x64_OpCode_t*   OpCode;
+       scf_instruction_t*  inst2;
+       scf_instruction_t*  std;
 
        int j;
-       for (j  = std_insts->size - 1; j >= 0; j--) {
-               std = std_insts->data[j];
+       for (j  = peep_insts->size - 1; j >= 0; j--) {
+               std = peep_insts->data[j];
 #if 0
                scf_loge("std j: %d\n", j);
                scf_3ac_code_print(std->c, NULL);
@@ -67,12 +69,13 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst)
                                return X64_PEEPHOLE_DEL;
                        }
 
-                       assert(0 == scf_vector_del(std_insts, std));
+                       assert(0 == scf_vector_del(peep_insts, std));
 
                        if (std->nb_used > 0)
                                continue;
 
                        assert(0 == scf_vector_del(std->c->instructions, std));
+                       assert(0 == scf_vector_del(save_insts,           std));
 
                        free(std);
                        std = NULL;
@@ -175,20 +178,16 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst)
                        }
                } else if (scf_inst_data_same(&std->src, &inst->dst)) {
 
-                       assert(0 == scf_vector_del(std_insts, std));
+                       assert(0 == scf_vector_del(peep_insts, std));
 
                } else if (x64_inst_data_is_reg(&std->src)) {
 
-                       scf_register_t* r0;
-                       scf_register_t* r1;
-
                        if (x64_inst_data_is_reg(&inst->dst)) {
-
                                r0 = std ->src.base;
                                r1 = inst->dst.base;
 
                                if (X64_COLOR_CONFLICT(r0->color, r1->color))
-                                       assert(0 == scf_vector_del(std_insts, std));
+                                       assert(0 == scf_vector_del(peep_insts, std));
                        }
 
                } else if (x64_inst_data_is_reg(&std->dst)) {
@@ -201,33 +200,41 @@ static int _x64_peephole_mov(scf_vector_t* std_insts, scf_instruction_t* inst)
                }
        }
 
-       assert(0 == scf_vector_add_unique(std_insts, inst));
+       if (x64_inst_data_is_reg(&inst->dst)) {
+               r1 = inst->dst.base;
+
+               for (j  = peep_insts->size - 1; j >= 0; j--) {
+                       std = peep_insts->data[j];
+
+                       if ((std->src.flag && (std->src.base == r1 || std->src.index == r1))
+                        || (std->dst.flag && (std->dst.base == r1 || std->dst.index == r1)))
+                               assert(0 == scf_vector_del(peep_insts, std));
+               }
+       }
+
+       assert(0 == scf_vector_add_unique(peep_insts, inst));
        return 0;
 }
 
-static int _x64_peephole_cmp(scf_vector_t* std_insts, scf_instruction_t* inst)
+static int _x64_peephole_cmp(scf_vector_t* save_insts, scf_vector_t* peep_insts, scf_instruction_t* inst)
 {
-       scf_3ac_code_t*    c  = inst->c;
-       scf_basic_block_t* bb = c->basic_block;
-
-       scf_instruction_t* inst2;
-       scf_instruction_t* std;
+       scf_3ac_code_t*     c  = inst->c;
+       scf_basic_block_t*  bb = c->basic_block;
+       scf_instruction_t*  inst2;
+       scf_instruction_t*  std;
 
        int j;
-       for (j  = std_insts->size - 1; j >= 0; j--) {
-               std = std_insts->data[j];
+       for (j  = peep_insts->size - 1; j >= 0; j--) {
+               std = peep_insts->data[j];
 
                if (SCF_X64_LEA == std->OpCode->type)
                        break;
 
                if (inst->src.flag) {
-
                        if (scf_inst_data_same(&inst->src, &std->src))
-
                                inst->src.base = std->dst.base;
 
                        else if (scf_inst_data_same(&inst->src, &std->dst))
-
                                inst->src.base = std->src.base;
                        else
                                goto check;
@@ -244,11 +251,9 @@ static int _x64_peephole_cmp(scf_vector_t* std_insts, scf_instruction_t* inst)
                } else if (inst->dst.flag) {
 
                        if (scf_inst_data_same(&inst->dst, &std->src))
-
                                inst->dst.base  = std->dst.base;
 
                        else if (scf_inst_data_same(&inst->dst, &std->dst))
-
                                inst->dst.base  = std->src.base;
                        else
                                goto check;
@@ -295,42 +300,6 @@ check:
        return 0;
 }
 
-static int _x64_peephole_movx(scf_vector_t* std_insts, scf_instruction_t* inst)
-{
-       if (!x64_inst_data_is_reg(&inst->src) || !x64_inst_data_is_reg(&inst->dst)) {
-               scf_vector_clear(std_insts, NULL);
-               return 0;
-       }
-
-       scf_3ac_code_t*    c  = inst->c;
-       scf_basic_block_t* bb = c->basic_block;
-       scf_instruction_t* std;
-       scf_x64_OpCode_t*  OpCode;
-       int j;
-
-       for (j  = std_insts->size - 1; j >= 0; j--) {
-               std = std_insts->data[j];
-
-               if (scf_inst_data_same(&std->dst, &inst->src)) {
-                       std->nb_used++;
-
-                       if (std->OpCode == inst->OpCode
-                                       && scf_inst_data_same(&std->src, &inst->src)
-                                       && scf_inst_data_same(&std->dst, &inst->dst)) {
-
-                               assert(0 == scf_vector_del(inst->c->instructions, inst));
-
-                               free(inst);
-                               inst = NULL;
-                               return X64_PEEPHOLE_DEL;
-                       }
-               }
-       }
-
-       assert(0 == scf_vector_add_unique(std_insts, inst));
-       return 0;
-}
-
 static int x64_inst_is_useful(scf_instruction_t* inst, scf_instruction_t* std)
 {
        if (scf_inst_data_same(&inst->dst, &std->src))
@@ -478,15 +447,15 @@ static int __x64_inst_useful_bb_next(scf_basic_block_t* bb, void* data, scf_vect
        return 0;
 }
 
-static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f)
+static int _x64_peephole_function(scf_vector_t* save_insts, scf_function_t* f)
 {
        scf_instruction_t*  inst;
        scf_basic_block_t*  bb;
        scf_3ac_code_t*     c;
        int i;
 
-       for (i   = tmp_insts->size - 1; i >= 0; i--) {
-               inst = tmp_insts->data[i];
+       for (i   = save_insts->size - 1; i >= 0; i--) {
+               inst = save_insts->data[i];
 
                if (SCF_X64_MOV != inst->OpCode->type)
                        continue;
@@ -510,7 +479,7 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f)
                        continue;
 
                assert(0 == scf_vector_del(c->instructions,  inst));
-               assert(0 == scf_vector_del(tmp_insts,        inst));
+               assert(0 == scf_vector_del(save_insts,        inst));
 
                free(inst);
                inst = NULL;
@@ -518,8 +487,8 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f)
 
        int n_locals = 0;
 
-       for (i = 0; i < tmp_insts->size; i++) {
-               inst      = tmp_insts->data[i];
+       for (i = 0; i < save_insts->size; i++) {
+               inst      = save_insts->data[i];
 
                if (x64_inst_data_is_local(&inst->src) || x64_inst_data_is_local(&inst->dst))
                        n_locals++;
@@ -536,7 +505,6 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f)
 
 int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
 {
-       scf_instruction_t*  std;
        scf_instruction_t*  inst;
        scf_basic_block_t*  bb;
        scf_3ac_operand_t*  dst;
@@ -544,16 +512,16 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
        scf_list_t*         l;
        scf_list_t*         l2;
 
-       scf_vector_t*       std_insts;
-       scf_vector_t*       tmp_insts; // instructions for register or local variable
+       scf_vector_t*       peep_insts;
+       scf_vector_t*       save_insts;
 
-       std_insts = scf_vector_alloc();
-       if (!std_insts)
+       peep_insts = scf_vector_alloc();
+       if (!peep_insts)
                return -ENOMEM;
 
-       tmp_insts = scf_vector_alloc();
-       if (!tmp_insts) {
-               scf_vector_free(tmp_insts);
+       save_insts = scf_vector_alloc();
+       if (!save_insts) {
+               scf_vector_free(save_insts);
                return -ENOMEM;
        }
 
@@ -569,7 +537,7 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
                bb = scf_list_data(l, scf_basic_block_t, list);
 
                if (bb->jmp_flag) {
-                       scf_vector_clear(std_insts, NULL);
+                       scf_vector_clear(peep_insts, NULL);
 
                        l2 = scf_list_head(&bb->code_list_head);
                        c  = scf_list_data(l2, scf_3ac_code_t, list);
@@ -582,9 +550,8 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
                        continue;
                }
 
-               if (bb->jmp_dst_flag) {
-                       scf_vector_clear(std_insts, NULL);
-               }
+               if (bb->jmp_dst_flag)
+                       scf_vector_clear(peep_insts, NULL);
 
                for (l2 = scf_list_head(&bb->code_list_head); l2 != scf_list_sentinel(&bb->code_list_head);
                        l2  = scf_list_next(l2)) {
@@ -603,26 +570,26 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
 //                             scf_instruction_print(inst);
 
                                ret = 0;
-                               switch (inst->OpCode->type) {
-
+                               switch (inst->OpCode->type)
+                               {
                                        case SCF_X64_CMP:
                                        case SCF_X64_TEST:
-                                               ret = _x64_peephole_cmp(std_insts, inst);
+                                               ret = _x64_peephole_cmp(save_insts, peep_insts, inst);
                                                break;
 
                                        case SCF_X64_MOV:
-                                               ret = _x64_peephole_mov(std_insts, inst);
+                                               ret = _x64_peephole_mov(save_insts, peep_insts, inst);
                                                break;
 
                                        case SCF_X64_LEA:
-                                               ret = scf_vector_add_unique(std_insts, inst);
+                                               ret = scf_vector_add_unique(peep_insts, inst);
                                                break;
 
                                        case SCF_X64_MOVSS:
                                        case SCF_X64_MOVSD:
                                                break;
                                        default:
-                                               scf_vector_clear(std_insts, NULL);
+                                               scf_vector_clear(peep_insts, NULL);
                                                break;
                                };
 
@@ -632,7 +599,7 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
                                if (X64_PEEPHOLE_DEL == ret)
                                        continue;
 
-                               ret = scf_vector_add(tmp_insts, inst);
+                               ret = scf_vector_add(save_insts, inst);
                                if (ret < 0)
                                        goto error;
                                i++;
@@ -640,9 +607,9 @@ int x64_optimize_peephole(scf_native_t* ctx, scf_function_t* f)
                }
        }
 
-       ret = _x64_peephole_function(tmp_insts, f);
+       ret = _x64_peephole_function(save_insts, f);
 error:
-       scf_vector_free(tmp_insts);
-       scf_vector_free(std_insts);
+       scf_vector_free(save_insts);
+       scf_vector_free(peep_insts);
        return ret;
 }
index efcc9aa06277a27d56075ea79863fedebe8c6a87..a94ef7c49ffa5991e0d22fa9358d6ed6d937519f 100644 (file)
@@ -95,7 +95,7 @@ void x64_registers_print()
        }
 }
 
-int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs)
+int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs, scf_register_t** drop_regs, int* n_drops)
 {
        scf_basic_block_t*  bb = c->basic_block;
        scf_dag_node_t*     dn;
@@ -119,6 +119,8 @@ int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int
        for (j = 0; j < nb_regs; j++) {
                r2 = x64_find_register(regs[j]);
 
+               int drop_flag = 0;
+
                for (i = 0; i < sizeof(x64_registers) / sizeof(x64_registers[0]); i++) {
                        r  = &(x64_registers[i]);
 
@@ -148,11 +150,15 @@ int x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int
 
                                if (k < r->dag_nodes->size)
                                        break;
+                               drop_flag = 1;
                        }
                }
 
-               if (i == sizeof(x64_registers) / sizeof(x64_registers[0]))
+               if (i == sizeof(x64_registers) / sizeof(x64_registers[0])) {
+                       if (drop_flag)
+                               drop_regs[(*n_drops)++] = r2;
                        continue;
+               }
 
                if (X64_COLOR_TYPE(r2->color)) {
                        if (stack_size > 0)
@@ -1181,6 +1187,46 @@ void x64_call_rabi(int* p_nints, int* p_nfloats, scf_3ac_code_t* c)
                *p_nfloats = nfloats;
 }
 
+int x64_drop_regs(scf_register_t** drop_regs, int n_drops)
+{
+       scf_register_t*  r2;
+       scf_register_t*  r;
+       scf_dag_node_t*  dn;
+       scf_variable_t*  v;
+
+       int N = sizeof(x64_registers) / sizeof(x64_registers[0]);
+       int i;
+       int j;
+
+       for (j = 0; j < N; j++) {
+               r2 = &(x64_registers[j]);
+
+               if (!r2->dag_nodes || r2->dag_nodes->size <= 0)
+                       continue;
+
+               for (i = 0; i < n_drops; i++) {
+                       r  = drop_regs[i];
+
+                       if (X64_COLOR_CONFLICT(r2->color, r->color))
+                               break;
+               }
+
+               if (i >= n_drops)
+                       continue;
+
+               for (i = 0; i < r2->dag_nodes->size; i++) {
+                       dn =        r2->dag_nodes->data[i];
+
+                       dn->color  = -1;
+                       dn->loaded = 0;
+               }
+
+               r2->dag_nodes->size = 0;
+       }
+
+       return 0;
+}
+
 int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f)
 {
        scf_x64_OpCode_t*  push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
@@ -1217,7 +1263,6 @@ int x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f)
 int x64_pop_callee_regs(scf_3ac_code_t* c, scf_function_t* f)
 {
        scf_x64_OpCode_t*  pop  = x64_find_OpCode(SCF_X64_POP, 8, 8, SCF_X64_G);
-
        scf_basic_block_t* bb   = c->basic_block;
 
        scf_instruction_t* inst;
index 0e595799571f20c156eb1269a936748fc0be807b..9be5f653b37278a98477915c77601ccc5344809f 100644 (file)
@@ -112,7 +112,9 @@ int                 x64_save_var2(scf_dag_node_t* dn, scf_register_t* r, scf_3ac
 int                 x64_push_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs);
 int                 x64_pop_regs (scf_vector_t* instructions, scf_register_t** regs, int nb_regs, scf_register_t** updated_regs, int nb_updated);
 
-int                 x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs);
+int                 x64_caller_save_regs(scf_3ac_code_t* c, const char* regs[], int nb_regs, int stack_size, scf_register_t** saved_regs, scf_register_t** drop_regs, int* n_drops);
+
+int                 x64_drop_regs(scf_register_t** drop_regs, int n_drops);
 
 int                 x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f);
 int                 x64_pop_callee_regs (scf_3ac_code_t* c, scf_function_t* f);
index 594763de6010b3b6290bf98f6b7639749d2b5b87..064f5931903b08eee893c9157c1d9ca09fd6d958 100644 (file)
@@ -168,7 +168,7 @@ CFILES += scf_dfa_async.c
 
 CFILES += scf_dfa_block.c
 
-CFLAGS += -g -O3
+CFLAGS += -g #-O3
 #CFLAGS += -Wall 
 CFLAGS += -I../util
 CFLAGS += -I../core