From e6c75944ba21b370c69b6d1215e20cb82f2e820a Mon Sep 17 00:00:00 2001
From: "yu.dongliang" <18588496441@163.com>
Date: Mon, 14 Oct 2024 17:43:41 +0800
Subject: [PATCH] optimize argv[]'s sign/zero extending position of function
 call()

---
 core/scf_basic_block.c    |  9 ++----
 core/scf_dag.h            |  6 ++--
 native/scf_native.c       | 18 ++++++++----
 native/x64/scf_x64.c      |  4 +--
 native/x64/scf_x64_inst.c | 42 +++++++++++++++++++++-------
 native/x64/scf_x64_reg.c  | 59 +++++++++++++++++++++++++++------------
 native/x64/scf_x64_reg.h  | 10 +++++--
 7 files changed, 102 insertions(+), 46 deletions(-)

diff --git a/core/scf_basic_block.c b/core/scf_basic_block.c
index 9939875..bdf13ce 100644
--- a/core/scf_basic_block.c
+++ b/core/scf_basic_block.c
@@ -379,7 +379,7 @@ void scf_basic_block_print_list(scf_list_t* h)
 
 				v  = dn->var;
 				if (v && v->w)
-					printf("entry active: v_%d_%d/%s\n", v->w->line, v->w->pos, v->w->text->data);
+					printf("entry active: v_%d_%d/%s_%#lx\n", v->w->line, v->w->pos, v->w->text->data, 0xffff & (uintptr_t)dn);
 			}
 		}
 
@@ -389,7 +389,7 @@ void scf_basic_block_print_list(scf_list_t* h)
 
 				v  = dn->var;
 				if (v && v->w)
-					printf("exit  active: v_%d_%d/%s, dn: %#lx\n", v->w->line, v->w->pos, v->w->text->data, 0xffff & (uintptr_t)dn);
+					printf("exit  active: v_%d_%d/%s_%#lx\n", v->w->line, v->w->pos, v->w->text->data, 0xffff & (uintptr_t)dn);
 			}
 		}
 
@@ -418,7 +418,7 @@ void scf_basic_block_print_list(scf_list_t* h)
 
 				v  = dn->var;
 				if (v && v->w)
-					printf("updated:      v_%d_%d/%s\n", v->w->line, v->w->pos, v->w->text->data);
+					printf("updated:      v_%d_%d/%s_%#lx\n", v->w->line, v->w->pos, v->w->text->data, 0xffff & (uintptr_t)dn);
 			}
 		}
 
@@ -518,9 +518,6 @@ static int _copy_vars_by_active(scf_vector_t* dn_vec, scf_vector_t* ds_vars, int
 
 		dn = ds->dag_node;
 
-		if (scf_variable_const(dn->var))
-			continue;
-
 		if (active == ds->active && scf_dn_through_bb(dn)) {
 
 			int ret = scf_vector_add_unique(dn_vec, dn);
diff --git a/core/scf_dag.h b/core/scf_dag.h
index b58923f..b02bd11 100644
--- a/core/scf_dag.h
+++ b/core/scf_dag.h
@@ -140,8 +140,10 @@ void              scf_ds_vector_clear_by_dn(scf_vector_t* vec, scf_dag_node_t*
 
 static int scf_dn_through_bb(scf_dag_node_t* dn)
 {
-	return (dn->var->global_flag || dn->var->local_flag || dn->var->tmp_flag)
-		&& !scf_variable_const(dn->var);
+	scf_variable_t* v = dn->var;
+
+	return (v->global_flag || v->local_flag || v->tmp_flag)
+		&& !(v->const_flag && 0 == v->nb_pointers + v->nb_dimentions);
 }
 
 static int scf_dn_status_cmp(const void* p0, const void* p1)
diff --git a/native/scf_native.c b/native/scf_native.c
index 14b3e81..6366eb7 100644
--- a/native/scf_native.c
+++ b/native/scf_native.c
@@ -14,9 +14,12 @@ void scf_instruction_print(scf_instruction_t* inst)
 			printf("%d(%s, %s, %d), ", inst->src.disp, inst->src.base->name,
 					inst->src.index->name, inst->src.scale);
 
-		else if (inst->src.base)
-			printf("%d(%s), ", inst->src.disp, inst->src.base->name);
-		else
+		else if (inst->src.base) {
+			if (inst->src.disp < 0)
+				printf("-%#x(%s), ", -inst->src.disp, inst->src.base->name);
+			else
+				printf("%#x(%s), ", inst->src.disp, inst->src.base->name);
+		} else
 			printf("%d(rip), ", inst->dst.disp);
 
 	} else if (inst->src.base)
@@ -30,9 +33,12 @@ void scf_instruction_print(scf_instruction_t* inst)
 			printf("%d(%s, %s, %d), ", inst->dst.disp, inst->dst.base->name,
 					inst->dst.index->name, inst->dst.scale);
 
-		else if (inst->dst.base)
-			printf("%d(%s), ", inst->dst.disp, inst->dst.base->name);
-		else
+		else if (inst->dst.base) {
+			if (inst->dst.disp < 0)
+				printf("-%#x(%s), ", -inst->dst.disp, inst->dst.base->name);
+			else
+				printf("%#x(%s), ", inst->dst.disp, inst->dst.base->name);
+		} else
 			printf("%d(rip), ", inst->dst.disp);
 
 	} else if (inst->dst.base)
diff --git a/native/x64/scf_x64.c b/native/x64/scf_x64.c
index ed48bea..5ebd062 100644
--- a/native/x64/scf_x64.c
+++ b/native/x64/scf_x64.c
@@ -53,7 +53,7 @@ static void _x64_argv_rabi(scf_function_t* f)
 
 		if (is_float) {
 
-			if (f->args_float < X64_ABI_NB) {
+			if (f->args_float < X64_ABI_FLOAT_NB) {
 
 				v->rabi       = x64_find_register_type_id_bytes(is_float, x64_abi_float_regs[f->args_float], size);
 				v->bp_offset  = bp_floats;
@@ -93,7 +93,7 @@ static int _x64_function_init(scf_function_t* f, scf_vector_t* local_vars)
 
 	_x64_argv_rabi(f);
 
-	int local_vars_size = 8 + X64_ABI_NB * 8 * 2;
+	int local_vars_size = 8 + (X64_ABI_NB + X64_ABI_FLOAT_NB) * 8;
 
 	for (i = 0; i < local_vars->size; i++) {
 		v  =        local_vars->data[i];
diff --git a/native/x64/scf_x64_inst.c b/native/x64/scf_x64_inst.c
index 7f6a32e..392cf08 100644
--- a/native/x64/scf_x64_inst.c
+++ b/native/x64/scf_x64_inst.c
@@ -112,10 +112,13 @@ static int _x64_inst_call_argv(scf_3ac_code_t* c, scf_function_t* f)
 	scf_x64_OpCode_t*   mov;
 	scf_x64_OpCode_t*   movx;
 	scf_instruction_t*  inst;
+	scf_instruction_t*  inst_movx[X64_ABI_NB + X64_ABI_FLOAT_NB] = {NULL};
 
+	int nb_movx   = 0;
 	int nb_floats = 0;
 	int ret;
 	int i;
+
 	for (i = c->srcs->size - 1; i >= 1; i--) {
 		scf_3ac_operand_t*  src   = c->srcs->data[i];
 		scf_variable_t*     v     = src->dag_node->var;
@@ -199,12 +202,12 @@ static int _x64_inst_call_argv(scf_3ac_code_t* c, scf_function_t* f)
 			rs = x64_find_register_color_bytes(rs->color, 8);
 		}
 
-		if (movx) {
-			inst = x64_make_inst_E2G(movx, rs,  rs);
-			X64_INST_ADD_CHECK(c->instructions, inst);
-		}
-
 		if (!rd) {
+			if (movx) {
+				inst = x64_make_inst_E2G(movx, rs,  rs);
+				X64_INST_ADD_CHECK(c->instructions, inst);
+			}
+
 			inst = x64_make_inst_G2P(mov, rsp, v->sp_offset, rs);
 			X64_INST_ADD_CHECK(c->instructions, inst);
 			continue;
@@ -217,12 +220,30 @@ static int _x64_inst_call_argv(scf_3ac_code_t* c, scf_function_t* f)
 		}
 
 		if (!X64_COLOR_CONFLICT(rd->color, rs->color)) {
+			if (movx) {
+				inst = x64_make_inst_E2G(movx, rs,  rs);
+				X64_INST_ADD_CHECK(c->instructions, inst);
+			}
+
 			rd   = x64_find_register_color_bytes(rd->color, rs->bytes);
 			inst = x64_make_inst_G2E(mov, rd, rs);
 			X64_INST_ADD_CHECK(c->instructions, inst);
+
+		} else if (movx) {
+			inst = x64_make_inst_E2G(movx, rs,  rs);
+			if (!inst) {
+				scf_loge("\n");
+				return -ENOMEM;
+			}
+
+			inst_movx[nb_movx++] = inst;
 		}
 	}
 
+	for (i = 0; i < nb_movx; i++) {
+		X64_INST_ADD_CHECK(c->instructions, inst_movx[i]);
+	}
+
 	return nb_floats;
 }
 
@@ -459,6 +480,7 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c)
 
 	scf_register_t*     rsp  = x64_find_register("rsp");
 	scf_register_t*     rax  = x64_find_register("rax");
+	scf_register_t*     eax  = x64_find_register("eax");
 //	scf_x64_OpCode_t*   xor;
 	scf_x64_OpCode_t*   mov;
 	scf_x64_OpCode_t*   sub;
@@ -502,15 +524,15 @@ static int _x64_inst_call_handler(scf_native_t* ctx, scf_3ac_code_t* c)
 		scf_loge("\n");
 		return ret;
 	}
-	uint64_t imm = ret > 0;
+	uint32_t imm = ret > 0;
 
-	mov  = x64_find_OpCode(SCF_X64_MOV, 8,8, SCF_X64_I2G);
-	inst = x64_make_inst_I2G(mov, rax, (uint8_t*)&imm, sizeof(imm));
+	mov  = x64_find_OpCode(SCF_X64_MOV, 4,4, SCF_X64_I2G);
+	inst = x64_make_inst_I2G(mov, eax, (uint8_t*)&imm, sizeof(imm));
 	X64_INST_ADD_CHECK(c->instructions, inst);
 
 	scf_register_t* saved_regs[X64_ABI_CALLER_SAVES_NB];
 
-	int save_size = x64_caller_save_regs(c->instructions, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs);
+	int save_size = x64_caller_save_regs(c, x64_abi_caller_saves, X64_ABI_CALLER_SAVES_NB, stack_size, saved_regs);
 	if (save_size < 0) {
 		scf_loge("\n");
 		return save_size;
@@ -2117,7 +2139,7 @@ static int _x64_inst_va_arg_handler(scf_native_t* ctx, scf_3ac_code_t* c)
 	int size     = x64_variable_size(v);
 
 	uint32_t nints   = X64_ABI_NB;
-	uint32_t nfloats = X64_ABI_NB;
+	uint32_t nfloats = X64_ABI_FLOAT_NB;
 	uint32_t offset  = 0;
 	uint32_t incptr  = 8;
 
diff --git a/native/x64/scf_x64_reg.c b/native/x64/scf_x64_reg.c
index 0bec392..80548d1 100644
--- a/native/x64/scf_x64_reg.c
+++ b/native/x64/scf_x64_reg.c
@@ -207,19 +207,23 @@ void x64_registers_print()
 	}
 }
 
-int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs)
+int x64_caller_save_regs(scf_3ac_code_t* c, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs)
 {
-	int i;
-	int j;
+	scf_basic_block_t*  bb = c->basic_block;
+	scf_dag_node_t*     dn;
+
+	scf_instruction_t*  inst;
+	scf_x64_OpCode_t*   push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
+	scf_x64_OpCode_t*   mov  = x64_find_OpCode(SCF_X64_MOV,  8,8, SCF_X64_G2E);
+	scf_register_t*     rsp  = x64_find_register("rsp");
 	scf_register_t*     r;
 	scf_register_t*     r2;
-	scf_register_t*     rsp  = x64_find_register("rsp");
-	scf_x64_OpCode_t*   mov  = x64_find_OpCode(SCF_X64_MOV,  8,8, SCF_X64_G2E);
-	scf_x64_OpCode_t*   push = x64_find_OpCode(SCF_X64_PUSH, 8,8, SCF_X64_G);
-	scf_instruction_t*  inst;
 
+	int i;
+	int j;
+	int k;
 	int size = 0;
-	int k    = 0;
+	int n    = 0;
 
 	for (j = 0; j < nb_regs; j++) {
 		r2 = x64_find_register_type_id_bytes(0, regs[j], 8);
@@ -233,8 +237,27 @@ int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs
 			if (0 == r->dag_nodes->size)
 				continue;
 
-			if (X64_COLOR_CONFLICT(r2->color, r->color))
-				break;
+			if (X64_COLOR_CONFLICT(r2->color, r->color)) {
+
+				for (k = 0; k < r->dag_nodes->size; k++) {
+					dn =        r->dag_nodes->data[k];
+
+					if (scf_vector_find(bb->exit_dn_actives, dn)
+							|| scf_vector_find(bb->dn_saves, dn)
+							|| scf_vector_find(bb->dn_resaves, dn)) {
+
+						scf_variable_t* v = dn->var;
+						if (v && v->w)
+							scf_logw("dn: %#lx, v_%d/%s/%#lx\n", 0xffff & (uintptr_t)dn, v->w->line, v->w->text->data, 0xffff & (uintptr_t)v);
+						else
+							scf_logw("dn: %#lx, v_%#lx\n", 0xffff & (uintptr_t)dn, 0xffff & (uintptr_t)v);
+						break;
+					}
+				}
+
+				if (k < r->dag_nodes->size)
+					break;
+			}
 		}
 
 		if (i == sizeof(x64_registers) / sizeof(x64_registers[0]))
@@ -244,29 +267,29 @@ int x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs
 			inst = x64_make_inst_G2P(mov, rsp, size + stack_size, r2);
 		else
 			inst = x64_make_inst_G(push, r2);
-		X64_INST_ADD_CHECK(instructions, inst);
+		X64_INST_ADD_CHECK(c->instructions, inst);
 
-		saved_regs[k++] = r2;
+		saved_regs[n++] = r2;
 		size += 8;
 	}
 
 	if (size & 0xf) {
-		r2 = saved_regs[k - 1];
+		r2 = saved_regs[n - 1];
 
 		if (stack_size > 0)
 			inst = x64_make_inst_G2P(mov, rsp, size + stack_size, r2);
 		else
 			inst = x64_make_inst_G(push, r2);
-		X64_INST_ADD_CHECK(instructions, inst);
+		X64_INST_ADD_CHECK(c->instructions, inst);
 
-		saved_regs[k++] = r2;
+		saved_regs[n++] = r2;
 		size += 8;
 	}
 
 	if (stack_size > 0) {
-		for (j = 0; j < k / 2; j++) {
+		for (j = 0; j < n / 2; j++) {
 
-			i  = k - 1 - j;
+			i  = n - 1 - j;
 			SCF_XCHG(saved_regs[i], saved_regs[j]);
 		}
 	}
@@ -1296,7 +1319,7 @@ void x64_call_rabi(int* p_nints, int* p_nfloats, scf_3ac_code_t* c)
 		int size     = x64_variable_size (dn->var);
 
 		if (is_float) {
-			if (nfloats < X64_ABI_NB)
+			if (nfloats < X64_ABI_FLOAT_NB)
 				dn->rabi2 = x64_find_register_type_id_bytes(is_float, x64_abi_float_regs[nfloats++], size);
 			else
 				dn->rabi2 = NULL;
diff --git a/native/x64/scf_x64_reg.h b/native/x64/scf_x64_reg.h
index 832c62e..6c90f72 100644
--- a/native/x64/scf_x64_reg.h
+++ b/native/x64/scf_x64_reg.h
@@ -41,6 +41,7 @@ static uint32_t x64_abi_regs[] =
 	SCF_X64_REG_R8,
 	SCF_X64_REG_R9,
 };
+#define X64_ABI_NB (sizeof(x64_abi_regs) / sizeof(x64_abi_regs[0]))
 
 static uint32_t x64_abi_float_regs[] =
 {
@@ -48,8 +49,12 @@ static uint32_t x64_abi_float_regs[] =
 	SCF_X64_REG_XMM1,
 	SCF_X64_REG_XMM2,
 	SCF_X64_REG_XMM3,
+	SCF_X64_REG_XMM4,
+	SCF_X64_REG_XMM5,
+	SCF_X64_REG_XMM6,
+	SCF_X64_REG_XMM7,
 };
-#define X64_ABI_NB (sizeof(x64_abi_regs) / sizeof(x64_abi_regs[0]))
+#define X64_ABI_FLOAT_NB (sizeof(x64_abi_float_regs) / sizeof(x64_abi_float_regs[0]))
 
 static uint32_t x64_abi_ret_regs[] =
 {
@@ -132,7 +137,8 @@ int                 x64_save_var2(scf_dag_node_t* dn, scf_register_t* r, scf_3ac
 
 int                 x64_push_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs);
 int                 x64_pop_regs (scf_vector_t* instructions, scf_register_t** regs, int nb_regs, scf_register_t** updated_regs, int nb_updated);
-int                 x64_caller_save_regs(scf_vector_t* instructions, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs);
+
+int                 x64_caller_save_regs(scf_3ac_code_t* c, uint32_t* regs, int nb_regs, int stack_size, scf_register_t** saved_regs);
 
 int                 x64_push_callee_regs(scf_3ac_code_t* c, scf_function_t* f);
 int                 x64_pop_callee_regs (scf_3ac_code_t* c, scf_function_t* f);
-- 
2.25.1