From: yu.dongliang <18588496441@163.com> Date: Fri, 24 Oct 2025 04:29:25 +0000 (+0800) Subject: js: support PCRE2 for RegExpr match(), scf: fix some bugs X-Git-Url: http://baseworks.info/?a=commitdiff_plain;h=02879087ee659369c76e2011854798d3d90bcd89;p=abc.git js: support PCRE2 for RegExpr match(), scf: fix some bugs --- diff --git a/examples/js.html b/examples/js.html index 7a8bb36..4c21780 100644 --- a/examples/js.html +++ b/examples/js.html @@ -8,9 +8,9 @@ diff --git a/js/Makefile b/js/Makefile index dc6778f..b4d00fb 100644 --- a/js/Makefile +++ b/js/Makefile @@ -13,6 +13,7 @@ CFLAGS += -I../js/native CFLAGS += -I../js/native/x64 CFLAGS += -I../js/native/risc +LDFLAGS += `pkg-config --cflags --libs libpcre2-8` LDFLAGS += -ldl -lm -pthread all: diff --git a/js/abc_libjs.c b/js/abc_libjs.c index a063ccd..5532441 100644 --- a/js/abc_libjs.c +++ b/js/abc_libjs.c @@ -1,4 +1,6 @@ #include"abc_html.h" +#define PCRE2_CODE_UNIT_WIDTH 8 +#include static char* js_days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" @@ -18,7 +20,7 @@ int64_t abc_js_date() return ts.tv_sec * 1000 + ts.tv_nsec / 1000000; } -void abc_js_date_toString(char* s, int max, int64_t msec) +int abc_js_date_toString(char* s, int max, int64_t msec) { struct tm tm; @@ -29,7 +31,7 @@ void abc_js_date_toString(char* s, int max, int64_t msec) int t0 = msec % (24 * 3600); int t1 = tm.tm_hour * 3600 + tm.tm_min * 60 + tm.tm_sec; - snprintf(s, max, "%s, %s %02d %d %02d:%02d:%02d GMT+%02d00", + return snprintf(s, max, "%s, %s %02d %d %02d:%02d:%02d GMT+%02d00", js_days[tm.tm_wday], js_mons[tm.tm_mon], tm.tm_mday, @@ -38,7 +40,7 @@ void abc_js_date_toString(char* s, int max, int64_t msec) tm.tm_min, tm.tm_sec, (t1 - t0) / 3600); -// strftime(s, max, "%A, %Y-%m-%d %T GMT%z", &tm); + // strftime(s, max, "%A, %Y-%m-%d %T GMT%z", &tm); } int abc_html_write(abc_html_t* html, const char* s) @@ -153,3 +155,263 @@ int abc_html_write_d(abc_html_t* html, double d) return abc_html_write(html, buf); } + +int abc_pcre2_match(int** __ovector, const char* __subject, const char* __pattern) +{ + pcre2_code *re; + PCRE2_SPTR pattern = (PCRE2_SPTR)__pattern; + PCRE2_SPTR subject = (PCRE2_SPTR)__subject; + PCRE2_SIZE subject_len = (PCRE2_SIZE)strlen(__subject); + + int crlf_is_newline; + int errornumber; + int find_all = 0; + int i; + int rc; + int utf8; + + uint32_t option_bits = 0; + uint32_t newline; + + PCRE2_SIZE erroroffset; + PCRE2_SIZE *ovector; + + pcre2_match_data *match_data; + + while ('/' == *pattern) + pattern++; + + PCRE2_SPTR modifier = pattern; + while (*modifier && '/' != *modifier) + modifier++; + + PCRE2_SIZE pattern_len = (PCRE2_SIZE)(modifier - pattern); + + printf("pattern: %.*s\n", (int)pattern_len, (char *)pattern); + printf("modifier: %s\n", modifier); + + while (*modifier) { + switch (*modifier) { + case 'g': + find_all = 1; + break; + case 'i': + option_bits |= PCRE2_CASELESS; + break; + case 'm': + option_bits |= PCRE2_MULTILINE; + break; + default: + break; + }; + + modifier++; + } + + re = pcre2_compile( + pattern, /* the pattern */ + pattern_len, /* indicates pattern is zero-terminated */ + option_bits, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ + if (!re) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + + scf_loge("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer); + return -1; + } + + match_data = pcre2_match_data_create_from_pattern(re, NULL); + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_len, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + if (rc < 0) { + if (PCRE2_ERROR_NOMATCH == rc) + scf_logi("No match\n"); + else + scf_loge("Matching error %d\n", rc); + + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return -1; + } + + ovector = pcre2_get_ovector_pointer(match_data); + printf("Match succeeded at offset %d\n", (int)ovector[0]); + + if (0 == rc) + printf("ovector was not big enough for all the captured substrings\n"); + + if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", + (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return -1; + } + + int* outs = malloc(rc * sizeof(int) * 2); + if (!outs) { + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return -ENOMEM; + } + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR str = subject + ovector[2 * i]; + PCRE2_SIZE len = ovector[2 * i + 1] - ovector[2 * i]; + + printf("%2d: %.*s\n", i, (int)len, (char *)str); + + outs[2 * i ] = ovector[2 * i ]; + outs[2 * i + 1] = ovector[2 * i + 1]; + } + + if (!find_all) { + pcre2_match_data_free(match_data); + pcre2_code_free(re); + + *__ovector = outs; + return rc; + } + + int n_outs = rc; + + pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits); + pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); + + utf8 = (option_bits & PCRE2_UTF) != 0; + crlf_is_newline = (PCRE2_NEWLINE_ANY == newline || PCRE2_NEWLINE_CRLF == newline || PCRE2_NEWLINE_ANYCRLF == newline); + + for (;;) { + uint32_t options = 0; + PCRE2_SIZE start_offset = ovector[1]; + + if (ovector[0] == ovector[1]) { + if (ovector[0] == subject_len) + break; + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } else { + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + + if (start_offset <= startchar) { + if (startchar >= subject_len) + break; + + start_offset = startchar + 1; + if (utf8) { + for (; start_offset < subject_len; start_offset++) + if ((subject[start_offset] & 0xc0) != 0x80) + break; + } + } + } + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_len, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + + if (PCRE2_ERROR_NOMATCH == rc) { + if (options == 0) /* All matches found */ + break; + + ovector[1] = start_offset + 1; /* Advance one code unit */ + + if (crlf_is_newline + && start_offset < subject_len - 1 + && '\r' == subject[start_offset] + && '\n' == subject[start_offset + 1]) + ovector[1] += 1; + else if (utf8) { + while (ovector[1] < subject_len) + { + if ((subject[ovector[1]] & 0xc0) != 0x80) + break; + ovector[1] += 1; + } + } + + continue; + } + + if (rc < 0) { + scf_loge("Matching error %d\n", rc); + + pcre2_match_data_free(match_data); + pcre2_code_free(re); + free(outs); + return -1; + } + + printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]); + + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + + if (ovector[0] > ovector[1]) + { + printf("\\K was used in an assertion to set the match start after its end.\n" + "From end to start the match was: %.*s\n", + (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\n"); + + pcre2_match_data_free(match_data); + pcre2_code_free(re); + free(outs); + return -1; + } + + void* p = realloc(outs, (n_outs + rc) * sizeof(int) * 2); + if (!p) { + pcre2_match_data_free(match_data); + pcre2_code_free(re); + free(outs); + return -ENOMEM; + } + outs = p; + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR str = subject + ovector[2 * i]; + size_t len = ovector[2 * i + 1] - ovector[2 * i]; + + printf("%2d: %.*s\n", i, (int)len, (char*)str); + + outs[2 * (n_outs + i) ] = ovector[2 * i ]; + outs[2 * (n_outs + i) + 1] = ovector[2 * i + 1]; + } + + n_outs += rc; + } + + printf("\n"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + + for (i = 0; i < n_outs; i++) + scf_logi("i: %d, %d,%d\n", i, outs[2 * i], outs[2 * i + 1]); + + + *__ovector = outs; + return n_outs; +} diff --git a/js/abc_libjs.so b/js/abc_libjs.so index a7677e2..5dade4e 100755 Binary files a/js/abc_libjs.so and b/js/abc_libjs.so differ diff --git a/js/core/scf_dag.c b/js/core/scf_dag.c index fb92226..9fa2005 100644 --- a/js/core/scf_dag.c +++ b/js/core/scf_dag.c @@ -317,7 +317,7 @@ void scf_dn_status_print(scf_dn_status_t* ds) } } - printf(" alias_type: %d\n", ds->alias_type); + printf(" alias_type: %d, ret_flag: %d, ret_index: %d\n", ds->alias_type, ds->ret_flag, ds->ret_index); } scf_dag_node_t* scf_dag_node_alloc(int type, scf_variable_t* var, const scf_node_t* node) @@ -581,8 +581,11 @@ int scf_dag_node_same(scf_dag_node_t* dn, const scf_node_t* node) } if (!dn->childs) { - if (SCF_OP_CALL == node->type && 1 == node->nb_nodes) - return __dn_same_call(dn, node, split); + if (SCF_OP_CALL == node->type /*&& 1 == node->nb_nodes*/) { + int ret = __dn_same_call(dn, node, split); + return ret; + } + return 0; } @@ -682,8 +685,10 @@ cmp_childs: } } - if (SCF_OP_CALL == dn->type) - return __dn_same_call(dn, node, split); + if (SCF_OP_CALL == dn->type) { + int ret = __dn_same_call(dn, node, split); + return ret; + } return 1; } diff --git a/js/core/scf_optimizer.c b/js/core/scf_optimizer.c index b6b4ed6..30c7918 100644 --- a/js/core/scf_optimizer.c +++ b/js/core/scf_optimizer.c @@ -115,7 +115,7 @@ int scf_optimize(scf_ast_t* ast, scf_vector_t* functions) if (!f->node.define_flag) continue; - if (strcmp(f->node.w->text->data, "P")) + if (strcmp(f->node.w->text->data, "__toString")) continue; printf("\n"); diff --git a/js/core/scf_optimizer_auto_gc_find.c b/js/core/scf_optimizer_auto_gc_find.c index b60c0a1..2c5cf66 100644 --- a/js/core/scf_optimizer_auto_gc_find.c +++ b/js/core/scf_optimizer_auto_gc_find.c @@ -240,6 +240,9 @@ static int _bb_add_ds_for_ret(scf_basic_block_t* bb, scf_dn_status_t* ds_obj, sc if (!ds2->ret_flag) continue; + if (ds2->dag_node->var->type != ds_obj->dag_node->var->type) + continue; + __bb_add_ds_append(bb, ds_obj, bb2, ds2); } diff --git a/js/core/scf_optimizer_inline.c b/js/core/scf_optimizer_inline.c index 233fe19..f710578 100644 --- a/js/core/scf_optimizer_inline.c +++ b/js/core/scf_optimizer_inline.c @@ -378,6 +378,7 @@ static int _optimize_inline2(scf_ast_t* ast, scf_function_t* f) int n_calls = 0; bb_cur = bb; + bb_cur->call_flag = 0; for (l2 = scf_list_head(&bb->code_list_head); l2 != scf_list_sentinel(&bb->code_list_head); ) { c = scf_list_data(l2, scf_3ac_code_t, list); @@ -398,22 +399,22 @@ static int _optimize_inline2(scf_ast_t* ast, scf_function_t* f) src = c->srcs->data[0]; v = _scf_operand_get(src->node); - if (!v->const_literal_flag) + if (!v->const_literal_flag) { + bb_cur->call_flag |= n_calls > 0; continue; + } f2 = v->func_ptr; - if (!f2->node.define_flag) - continue; - - if (!f2->inline_flag) - continue; - - if (f2->vargs_flag) + if (!f2->node.define_flag || !f2->inline_flag || f2->vargs_flag) { + bb_cur->call_flag |= n_calls > 0; continue; + } - if (f2->nb_basic_blocks > 10) + if (f2->nb_basic_blocks > 10) { + bb_cur->call_flag |= n_calls > 0; continue; + } #if 1 bb2 = bb_cur; bb_cur->call_flag = 0; @@ -436,8 +437,6 @@ static int _optimize_inline2(scf_ast_t* ast, scf_function_t* f) } #endif } - - bb_cur->call_flag |= n_calls > 0; } #if 0 diff --git a/js/doc.c b/js/doc.c index 9821bd1..5b6dfdd 100644 --- a/js/doc.c +++ b/js/doc.c @@ -6,8 +6,10 @@ int abc_html_write (Object* html, const char* s); int abc_html_write_i(Object* html, int64_t i); int abc_html_write_d(Object* html, double d); +int abc_pcre2_match(int** __ovector, const char* __subject, const char* __pattern); + int64_t abc_js_date(); -void abc_js_date_toString(char* s, int max, int64_t msec); +int abc_js_date_toString(char* s, int max, int64_t msec); enum { JS_Undef, @@ -16,6 +18,7 @@ enum { JS_Object, JS_Date, JS_Boolean, + JS_Array, }; struct Object @@ -189,10 +192,50 @@ struct Object return res; } - const char* toString(Object* this) + int __toStringLen(Object* this) + { + int len; + int type = JS_Undef; + + if (this) + type = this->type; + + switch (type) { + case JS_Boolean: + len = 7; + break; + + case JS_Number: + len = 127; + break; + + case JS_Date: + len = 127; + break; + + case JS_String: + len = strlen(this->str); + break; + + case JS_Object: + len = strlen("[object Object]"); + break; + + case JS_Array: + len = strlen("[array Array]"); + break; + + default: + len = strlen("undefined"); + break; + }; + + return len + 1; + } + + int __toString(Object* this, char* s) { char* p; - char* s; int len; int type = JS_Undef; @@ -201,64 +244,90 @@ struct Object switch (type) { case JS_Boolean: - s = scf__auto_malloc(8); - if (!s) - return NULL; - if (this->i64) - sprintf(s, "true"); + len = sprintf(s, "true"); else - sprintf(s, "false"); + len = sprintf(s, "false"); break; case JS_Number: - s = scf__auto_malloc(128); - if (!s) - return NULL; + len = sprintf(s, "%lg", this->d); + break; - snprintf(s, 127, "%lg", this->d); + case JS_Date: + len = abc_js_date_toString(s, 127, this->i64); break; case JS_String: len = strlen(this->str); - - s = scf__auto_malloc(len + 1); - if (!s) - return NULL; - memcpy(s, this->str, len + 1); break; case JS_Object: p = "[object Object]"; len = strlen(p); - - s = scf__auto_malloc(len + 1); - if (!s) - return NULL; - memcpy(s, p, len + 1); break; - case JS_Date: - s = scf__auto_malloc(128); - if (!s) - return NULL; - - abc_js_date_toString(s, 127, this->i64); + case JS_Array: + p = "[array Array]"; + len = strlen(p); + memcpy(s, p, len + 1); break; + default: p = "undefined"; len = strlen(p); - - s = scf__auto_malloc(len + 1); - if (!s) - return NULL; - memcpy(s, p, len + 1); break; }; + return len; + } + + const char* toString(Object* this) + { + Object* obj; + char* s; + int len; + int type = JS_Undef; + int i; + int n; + + if (this) + type = this->type; + + if (JS_Array == type) { + len = 0; + for (i = 0; i < this->length; i++) { + n = this->members[i]->__toStringLen(); + len += n + 1; + } + + s = scf__auto_malloc(len); + if (!s) + return NULL; + + len = 0; + for (i = 0; i < this->length; i++) { + n = this->members[i]->__toString(s + len); + len += n; + + if (i < this->length - 1) { + sprintf(s + len, ", "); + len += 2; + } + } + } else { + len = this->__toStringLen(); + + s = scf__auto_malloc(len); + if (!s) + return NULL; + + this->__toString(s); + } + return s; } @@ -324,6 +393,83 @@ struct Object Object* res = new Object(sqrt(obj->d)); return res; } + + Object* RegExpr(Object* this, const char* pattern) + { + Object* res = new Object(pattern); + return res; + } + + Object* RegExpr(Object* this, const char* pattern, const char* modifiers) + { + int n0 = strlen(pattern); + int n1 = strlen(modifiers); + + char* s = malloc(n0 + n1 + 2); + if (!s) + return NULL; + sprintf(s, "%s/%s", pattern, modifiers); + + Object* res = new Object(s); + + free(s); + return res; + } + + Object* RegExpr(Object* this, Object* pattern) + { + return this->RegExpr(pattern->str); + } + Object* RegExpr(Object* this, Object* pattern, Object* modifiers) + { + return this->RegExpr(pattern->str, modifiers->str); + } + + Object* substr(Object* this, int j0, int j1) + { + Object* res = new Object(); + if (res) { + int len = j1 - j0; + + res->str = scf__auto_malloc(len + 1); + if (res->str) { + memcpy(res->str, this->str + j0, len); + } + + res->type = JS_String; + } + return res; + } + + Object* match(Object* this, const char* pattern) + { + int* ovector = NULL; + + int rc = abc_pcre2_match(&ovector, this->str, pattern); + if (rc < 0) + return NULL; + + Object* res = new Object("matches", rc); + if (res) { + int i; + for (i = 0; i < rc; i++) { + int j0 = ovector[2 * i]; + int j1 = ovector[2 * i + 1]; + + res->members[i] = this->substr(j0, j1); + } + + res->type = JS_Array; + } + + free(ovector); + return res; + } + + Object* match(Object* this, Object* pattern) + { + return this->match(pattern->str); + } }; const double Math_PI = 3.1415926; @@ -338,6 +484,9 @@ void Object_func_arguments(Object* this, int i, Object* arg) int Object_array_realloc(Object* this, int i) { + if (JS_Object != this->type) + return 0; + if (i < this->length) return 0; diff --git a/js/elf/scf_elf_link.c b/js/elf/scf_elf_link.c index 719f8d7..ba62981 100644 --- a/js/elf/scf_elf_link.c +++ b/js/elf/scf_elf_link.c @@ -1003,7 +1003,7 @@ static int link_relas(scf_elf_file_t* exec, char* afiles[], int nb_afiles, char* return -1; } } - +#if 0 for (i = 0; i < exec->dyn_needs->size; i++) { so = exec->dyn_needs->data[i]; @@ -1033,6 +1033,7 @@ static int link_relas(scf_elf_file_t* exec, char* afiles[], int nb_afiles, char* scf_vector_add_unique(exec->dyn_needs, so2); } } +#endif return 0; } diff --git a/js/native/x64/scf_x64_peephole.c b/js/native/x64/scf_x64_peephole.c index 7a1d856..57f906c 100644 --- a/js/native/x64/scf_x64_peephole.c +++ b/js/native/x64/scf_x64_peephole.c @@ -339,7 +339,7 @@ static int x64_inst_is_useful(scf_instruction_t* inst, scf_instruction_t* std) if (x64_inst_data_is_reg(&inst->dst)) { scf_register_t* r0 = inst->dst.base; - scf_register_t* r1 = std->src.base; + scf_register_t* r1; if (SCF_X64_CALL == std->OpCode->type) { @@ -352,15 +352,25 @@ static int x64_inst_is_useful(scf_instruction_t* inst, scf_instruction_t* std) return 1; } else { + r1 = std->src.base; if (x64_inst_data_is_reg(&std->src)) { if (X64_COLOR_CONFLICT(r0->color, r1->color)) return 1; } - if (std->src.base == inst->dst.base - || std->src.index == inst->dst.base - || std->dst.index == inst->dst.base - || std->dst.base == inst->dst.base) + if (r1 && X64_COLOR_CONFLICT(r1->color, r0->color)) + return 1; + + r1 = std->src.index; + if (r1 && X64_COLOR_CONFLICT(r1->color, r0->color)) + return 1; + + r1 = std->dst.index; + if (r1 && X64_COLOR_CONFLICT(r1->color, r0->color)) + return 1; + + r1 = std->dst.base; + if (r1 && X64_COLOR_CONFLICT(r1->color, r0->color)) return 1; } @@ -502,9 +512,6 @@ static int _x64_peephole_function(scf_vector_t* tmp_insts, scf_function_t* f) assert(0 == scf_vector_del(c->instructions, inst)); assert(0 == scf_vector_del(tmp_insts, inst)); -// scf_logd("del: \n"); -// scf_instruction_print(inst); - free(inst); inst = NULL; } diff --git a/js/native/x64/scf_x64_reg.c b/js/native/x64/scf_x64_reg.c index ae6033b..36b1bd7 100644 --- a/js/native/x64/scf_x64_reg.c +++ b/js/native/x64/scf_x64_reg.c @@ -1221,24 +1221,25 @@ int x64_array_index_reg(x64_sib_t* sib, scf_dag_node_t* base, scf_dag_node_t* in if (ri->bytes < ri2->bytes) { if (scf_variable_signed(index->var)) { - mov = x64_find_OpCode(SCF_X64_MOVSX, ri->bytes, ri2->bytes, SCF_X64_E2G); + mov = x64_find_OpCode(SCF_X64_MOVSX, ri->bytes, ri2->bytes, SCF_X64_E2G); + inst = x64_make_inst_E2G(mov, ri2, ri); + X64_INST_ADD_CHECK(c->instructions, inst); } else if (ri->bytes <= 2) { - mov = x64_find_OpCode(SCF_X64_MOVZX, ri->bytes, ri2->bytes, SCF_X64_E2G); + mov = x64_find_OpCode(SCF_X64_MOVZX, ri->bytes, ri2->bytes, SCF_X64_E2G); + inst = x64_make_inst_E2G(mov, ri2, ri); + X64_INST_ADD_CHECK(c->instructions, inst); } else { assert(4 == ri->bytes); - +/* xor = x64_find_OpCode(SCF_X64_XOR, 8, 8, SCF_X64_G2E); inst = x64_make_inst_G2E(xor, ri2, ri2); X64_INST_ADD_CHECK(c->instructions, inst); - - mov = x64_find_OpCode(SCF_X64_MOV, 4, 4, SCF_X64_E2G); + mov = x64_find_OpCode(SCF_X64_MOV, 4, 4, SCF_X64_E2G); +*/ } - inst = x64_make_inst_E2G(mov, ri2, ri); - X64_INST_ADD_CHECK(c->instructions, inst); - ri = ri2; }