From ba0e4b27f0f415e6411c3f70ef1ea44e708432d0 Mon Sep 17 00:00:00 2001 From: "yu.dongliang" <18588496441@163.com> Date: Sat, 5 Oct 2024 19:28:59 +0800 Subject: [PATCH] fix: x64 SIB encode error for register r12, r13. fix: DAG of call() error when function has no arg. fix: change pointer add / sub to address of array. fix: optimize 'const teq' --- core/scf_3ac.c | 1 - core/scf_core_types.h | 1 + core/scf_dag.c | 43 +++++++------ core/scf_operator_dag.c | 4 +- core/scf_optimizer_auto_gc_find.c | 5 +- core/scf_optimizer_basic_block.c | 2 +- core/scf_optimizer_const_teq.c | 38 ++++++----- core/scf_pointer_alias.c | 8 ++- native/x64/scf_x64_inst_util.c | 12 +++- parse/scf_operator_handler_semantic.c | 91 ++++++++++++++++++++++++--- 10 files changed, 150 insertions(+), 55 deletions(-) diff --git a/core/scf_3ac.c b/core/scf_3ac.c index ea64fec..5f0374e 100644 --- a/core/scf_3ac.c +++ b/core/scf_3ac.c @@ -625,7 +625,6 @@ static int _3ac_code_to_dag(scf_3ac_code_t* c, scf_list_t* dag, int nb_operands0 } } - printf("\n"); return 0; } diff --git a/core/scf_core_types.h b/core/scf_core_types.h index f742960..9208e77 100644 --- a/core/scf_core_types.h +++ b/core/scf_core_types.h @@ -160,6 +160,7 @@ enum scf_core_types { SCF_OP_3AC_DEC_POST_ARRAY_INDEX, SCF_OP_3AC_ADDRESS_OF_ARRAY_INDEX, + // 97 SCF_OP_3AC_ASSIGN_POINTER, // left value, p->a = expr SCF_OP_3AC_ADD_ASSIGN_POINTER, SCF_OP_3AC_SUB_ASSIGN_POINTER, diff --git a/core/scf_dag.c b/core/scf_dag.c index 53182ad..209d8ec 100644 --- a/core/scf_dag.c +++ b/core/scf_dag.c @@ -498,6 +498,24 @@ void scf_dag_node_free_list(scf_list_t* dag_list_head) } } +static int __dn_same_call(scf_dag_node_t* dn, const scf_node_t* node, const scf_node_t* split) +{ + scf_variable_t* v0 = _scf_operand_get(node); + scf_variable_t* v1 = dn->var; + + if (split) + v0 = _scf_operand_get(split); + + if (v0 && v0->w && v1 && v1->w) { + if (v0->type != v1->type) { + scf_logd("v0: %d/%s_%#lx, split_flag: %d\n", v0->w->line, v0->w->text->data, 0xffff & (uintptr_t)v0, node->split_flag); + scf_logd("v1: %d/%s_%#lx\n", v1->w->line, v1->w->text->data, 0xffff & (uintptr_t)v1); + } + } + + return v0 == v1; +} + int scf_dag_node_same(scf_dag_node_t* dn, const scf_node_t* node) { int i; @@ -505,7 +523,6 @@ int scf_dag_node_same(scf_dag_node_t* dn, const scf_node_t* node) const scf_node_t* split = NULL; if (node->split_flag) { - if (dn->var != _scf_operand_get(node)) return 0; @@ -542,8 +559,11 @@ int scf_dag_node_same(scf_dag_node_t* dn, const scf_node_t* node) return 0; } - if (!dn->childs) + if (!dn->childs) { + if (SCF_OP_CALL == node->type && 1 == node->nb_nodes) + return __dn_same_call(dn, node, split); return 0; + } if (SCF_OP_TYPE_CAST == node->type) { scf_dag_node_t* dn0 = dn->childs->data[0]; @@ -641,24 +661,9 @@ cmp_childs: } } - if (SCF_OP_CALL == dn->type) { - - scf_variable_t* v0 = _scf_operand_get(node); - scf_variable_t* v1 = dn->var; - - if (split) - v0 = _scf_operand_get(split); - - if (v0 && v0->w && v1 && v1->w) { - if (v0->type != v1->type) { - scf_logd("v0: %d/%s_%#lx, split_flag: %d\n", v0->w->line, v0->w->text->data, 0xffff & (uintptr_t)v0, node->split_flag); - scf_logd("v1: %d/%s_%#lx\n", v1->w->line, v1->w->text->data, 0xffff & (uintptr_t)v1); - } - } + if (SCF_OP_CALL == dn->type) + return __dn_same_call(dn, node, split); - if (v0 != v1) - return 0; - } return 1; } diff --git a/core/scf_operator_dag.c b/core/scf_operator_dag.c index 615d57d..68bb4bc 100644 --- a/core/scf_operator_dag.c +++ b/core/scf_operator_dag.c @@ -433,6 +433,8 @@ scf_dag_operator_t dag_operators[] = {SCF_OP_3AC_SUB_ASSIGN_POINTER, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_sub_assign_pointer}, {SCF_OP_3AC_AND_ASSIGN_POINTER, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_and_assign_pointer}, {SCF_OP_3AC_OR_ASSIGN_POINTER, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_or_assign_pointer}, + {SCF_OP_3AC_INC_POINTER, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_inc_pointer}, + {SCF_OP_3AC_DEC_POINTER, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_dec_pointer}, {SCF_OP_3AC_ASSIGN_DEREFERENCE, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_assign_dereference}, {SCF_OP_3AC_ADD_ASSIGN_DEREFERENCE, SCF_OP_ASSOCIATIVITY_RIGHT, _scf_dag_op_add_assign_dereference}, @@ -483,7 +485,7 @@ int scf_dag_expr_calculate(scf_list_t* h, scf_dag_node_t* node) #endif scf_dag_operator_t* op = scf_dag_operator_find(node->type); if (!op) { - scf_loge("node->type: %d, %d\n", node->type, SCF_OP_3AC_ADD_ASSIGN_DEREFERENCE); + scf_loge("node->type: %d, %d\n", node->type, SCF_OP_3AC_DEC_POINTER); if (node->var && node->var->w) scf_loge("node->var: %s\n", node->var->w->text->data); return -1; diff --git a/core/scf_optimizer_auto_gc_find.c b/core/scf_optimizer_auto_gc_find.c index 45b9767..aba9348 100644 --- a/core/scf_optimizer_auto_gc_find.c +++ b/core/scf_optimizer_auto_gc_find.c @@ -1015,7 +1015,7 @@ static int _auto_gc_function_find(scf_ast_t* ast, scf_function_t* f, scf_list_t* for (i = 0; i < bb->ds_malloced->size; i++) { ds = bb->ds_malloced->data[i]; #if 1 - scf_loge("ds->ret: %u, ds->dag_node->var->arg_flag: %u\n", ds->ret, ds->dag_node->var->arg_flag); + scf_logi("ds->ret: %u, ds->dag_node->var->arg_flag: %u\n", ds->ret, ds->dag_node->var->arg_flag); scf_dn_status_print(ds); printf("\n"); #endif @@ -1031,7 +1031,7 @@ static int _auto_gc_function_find(scf_ast_t* ast, scf_function_t* f, scf_list_t* _bb_find_ds_alias_leak(ds, c, bb, bb_list_head); } } - scf_loge("f: %s *****\n\n", f->node.w->text->data); + scf_logi("f: %s *****\n\n", f->node.w->text->data); return total; } @@ -1119,4 +1119,3 @@ scf_optimizer_t scf_optimizer_auto_gc_find = .flags = SCF_OPTIMIZER_GLOBAL, }; - diff --git a/core/scf_optimizer_basic_block.c b/core/scf_optimizer_basic_block.c index 0e389d8..3e36fb7 100644 --- a/core/scf_optimizer_basic_block.c +++ b/core/scf_optimizer_basic_block.c @@ -237,7 +237,7 @@ static int _optimize_basic_block(scf_ast_t* ast, scf_function_t* f, scf_vector_t if (scf_list_empty(bb_list_head)) return 0; -// scf_logi("------- %s() ------\n", f->node.w->text->data); + scf_logd("------- %s() ------\n", f->node.w->text->data); for (l = scf_list_head(bb_list_head); l != scf_list_sentinel(bb_list_head); l = scf_list_next(l)) { diff --git a/core/scf_optimizer_const_teq.c b/core/scf_optimizer_const_teq.c index 57086f0..0fbe2b7 100644 --- a/core/scf_optimizer_const_teq.c +++ b/core/scf_optimizer_const_teq.c @@ -163,28 +163,30 @@ static int __optimize_const_teq(scf_basic_block_t* bb, scf_function_t* f) l2 = scf_list_head(&bb2->code_list_head); c = scf_list_data(l2, scf_3ac_code_t, list); - if (SCF_OP_3AC_JZ == c->op->type) { + if (!jmp_flag) { + if (SCF_OP_3AC_JZ == c->op->type) { - if (0 == flag) { - c->op = scf_3ac_find_operator(SCF_OP_GOTO); - jmp_flag = 1; - continue; - } + if (0 == flag) { + c->op = scf_3ac_find_operator(SCF_OP_GOTO); + jmp_flag = 1; + continue; + } - } else if (SCF_OP_3AC_JNZ == c->op->type) { + } else if (SCF_OP_3AC_JNZ == c->op->type) { - if (1 == flag) { - c->op = scf_3ac_find_operator(SCF_OP_GOTO); + if (1 == flag) { + c->op = scf_3ac_find_operator(SCF_OP_GOTO); + jmp_flag = 1; + continue; + } + + } else if (SCF_OP_GOTO == c->op->type) { jmp_flag = 1; continue; + } else { + scf_loge("\n"); + return -EINVAL; } - - } else if (SCF_OP_GOTO == c->op->type) { - jmp_flag = 1; - continue; - } else { - scf_loge("\n"); - return -EINVAL; } assert(c->dsts && 1 == c->dsts->size); @@ -200,7 +202,7 @@ static int __optimize_const_teq(scf_basic_block_t* bb, scf_function_t* f) if (jmp_flag && bb2 && !bb2->jmp_flag) { - assert(0 == scf_vector_del(bb2->prevs, bb)); + scf_vector_del(bb2->prevs, bb); } int ret; @@ -247,6 +249,8 @@ static int _optimize_const_teq(scf_ast_t* ast, scf_function_t* f, scf_vector_t* if (scf_list_empty(bb_list_head)) return 0; + scf_logd("------- %s() ------\n", f->node.w->text->data); + for (l = scf_list_head(bb_list_head); l != scf_list_sentinel(bb_list_head); l = scf_list_next(l)) { bb = scf_list_data(l, scf_basic_block_t, list); diff --git a/core/scf_pointer_alias.c b/core/scf_pointer_alias.c index 2ca47bf..fe189ab 100644 --- a/core/scf_pointer_alias.c +++ b/core/scf_pointer_alias.c @@ -168,7 +168,11 @@ static int _bb_pointer_initeds(scf_vector_t* initeds, scf_list_t* bb_list_head, if (ds->dn_indexes) return 0; - scf_loge("pointer '%s' is not inited, file: %s, line: %d\n", v->w->text->data, v->w->file->data, v->w->line); + if (v->tmp_flag) + return 0; + + scf_loge("pointer '%s' is not inited, tmp_flag: %d, local_flag: %d, file: %s, line: %d\n", + v->w->text->data, v->tmp_flag, v->local_flag, v->w->file->data, v->w->line); return SCF_POINTER_NOT_INIT; } @@ -1028,8 +1032,6 @@ static int _pointer_alias_var(scf_vector_t* aliases, scf_dag_node_t* dn_alias, s ds->alias = dn_alias; ds->alias_type = SCF_DN_ALIAS_VAR; - scf_dn_status_print(ds); - ret = scf_vector_add(aliases, ds); if (ret < 0) { scf_dn_status_free(ds); diff --git a/native/x64/scf_x64_inst_util.c b/native/x64/scf_x64_inst_util.c index 8161605..9b197e7 100644 --- a/native/x64/scf_x64_inst_util.c +++ b/native/x64/scf_x64_inst_util.c @@ -709,7 +709,11 @@ scf_instruction_t* _x64_make_inst_SIB(scf_instruction_t* inst, scf_x64_OpCode_t* scf_ModRM_setReg(&ModRM, reg); scf_ModRM_setRM(&ModRM, SCF_X64_RM_SIB); - if (SCF_X64_RM_EBP != r_base->id && SCF_X64_RM_ESP != r_base->id && 0 == disp) + if (SCF_X64_RM_EBP != r_base->id + && SCF_X64_RM_ESP != r_base->id + && SCF_X64_RM_R12 != r_base->id + && SCF_X64_RM_R13 != r_base->id + && 0 == disp) scf_ModRM_setMod(&ModRM, SCF_X64_MOD_BASE); else { if (disp <= 127 && disp >= -128) @@ -742,7 +746,11 @@ scf_instruction_t* _x64_make_inst_SIB(scf_instruction_t* inst, scf_x64_OpCode_t* }; inst->code[inst->len++] = SIB; - if (SCF_X64_RM_EBP == r_base->id || SCF_X64_RM_ESP == r_base->id || 0 != disp) { + if (SCF_X64_RM_EBP == r_base->id + || SCF_X64_RM_ESP == r_base->id + || SCF_X64_RM_R12 == r_base->id + || SCF_X64_RM_R13 == r_base->id + || 0 != disp) { if (disp <= 127 && disp >= -128) inst->code[inst->len++] = (int8_t)disp; diff --git a/parse/scf_operator_handler_semantic.c b/parse/scf_operator_handler_semantic.c index 265db38..d921c00 100644 --- a/parse/scf_operator_handler_semantic.c +++ b/parse/scf_operator_handler_semantic.c @@ -2035,6 +2035,58 @@ static int _scf_op_semantic_bit_not(scf_ast_t* ast, scf_node_t** nodes, int nb_n return 0; } +static int _semantic_pointer_add(scf_ast_t* ast, scf_node_t* parent, scf_node_t* pointer, scf_node_t* index) +{ + scf_variable_t* r; + scf_variable_t* v = _scf_operand_get(pointer); + scf_type_t* t = NULL; + scf_node_t* add; + + int ret = scf_ast_find_type_type(&t, ast, v->type); + if (ret < 0) + return ret; + + add = scf_node_alloc(parent->w, SCF_OP_ARRAY_INDEX, NULL); + if (!add) + return -ENOMEM; + + r = SCF_VAR_ALLOC_BY_TYPE(parent->w, t, v->const_flag, scf_variable_nb_pointers(v), v->func_ptr); + if (!r) { + scf_node_free(add); + return -ENOMEM; + } + r->local_flag = 1; + r->tmp_flag = 1; + + add->result = r; + r = NULL; + + ret = scf_node_add_child(add, pointer); + if (ret < 0) { + scf_node_free(add); + return ret; + } + + ret = scf_node_add_child(add, index); + if (ret < 0) { + pointer->parent = parent; + + add->nb_nodes = 0; + scf_node_free(add); + return ret; + } + + add->parent = parent; + + parent->nodes[0] = add; + parent->nodes[1] = NULL; + parent->nb_nodes = 1; + + parent->op = scf_find_base_operator_by_type(SCF_OP_ADDRESS_OF); + parent->type = SCF_OP_ADDRESS_OF; + return 0; +} + static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_nodes, void* data) { assert(2 == nb_nodes); @@ -2068,10 +2120,11 @@ static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_no scf_variable_t* v2 = NULL; scf_type_t* t = NULL; - int const_flag = 0; - int nb_pointers = 0; - int nb_pointers0 = scf_variable_nb_pointers(v0); - int nb_pointers1 = scf_variable_nb_pointers(v1); + int const_flag = 0; + int nb_pointers = 0; + int nb_pointers0 = scf_variable_nb_pointers(v0); + int nb_pointers1 = scf_variable_nb_pointers(v1); + int add_flag = 0; if (nb_pointers0 > 0) { @@ -2099,6 +2152,14 @@ static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_no scf_loge("add type cast failed\n"); return ret; } + + if (SCF_OP_ADD == parent->type || SCF_OP_SUB == parent->type) { + + ret = _semantic_pointer_add(ast, parent, nodes[0], nodes[1]); + if (ret < 0) + return ret; + add_flag = 1; + } } t = NULL; @@ -2115,7 +2176,13 @@ static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_no if (!scf_variable_integer(v0)) { scf_loge("var calculated with a pointer should be a interger\n"); return -EINVAL; + } else { + if (SCF_OP_SUB == parent->type) { + scf_loge("only a pointer sub an integer, NOT reverse, file: %s, line: %d\n", parent->w->file->data, parent->w->line); + return -1; + } + t = scf_block_find_type_type(ast->current_block, SCF_VAR_UINTPTR); v2 = SCF_VAR_ALLOC_BY_TYPE(v0->w, t, v0->const_flag, 0, NULL); @@ -2128,6 +2195,14 @@ static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_no scf_loge("add type cast failed\n"); return ret; } + + if (SCF_OP_ADD == parent->type) { + ret = _semantic_pointer_add(ast, parent, nodes[1], nodes[0]); + if (ret < 0) + return ret; + + add_flag = 1; + } } t = NULL; @@ -2176,12 +2251,12 @@ static int _scf_op_semantic_binary(scf_ast_t* ast, scf_node_t** nodes, int nb_no func_ptr = NULL; } - scf_lex_word_t* w = nodes[0]->parent->w; + scf_lex_word_t* w = parent->w; scf_variable_t* r = SCF_VAR_ALLOC_BY_TYPE(w, t, const_flag, nb_pointers, func_ptr); - if (!r) { - scf_loge("var alloc failed\n"); + if (!r) return -ENOMEM; - } + + r->tmp_flag = add_flag; *d->pret = r; return 0; -- 2.25.1