add Naja float instructions
authoryu.dongliang <18588496441@163.com>
Fri, 31 Mar 2023 08:19:24 +0000 (16:19 +0800)
committeryu.dongliang <18588496441@163.com>
Fri, 31 Mar 2023 08:19:24 +0000 (16:19 +0800)
native/risc/scf_naja.c
vm/scf_vm.h
vm/scf_vm_naja.c
vm/scf_vm_naja_asm.c
vm/scf_vm_test.c

index 63f1565d8763f7fea093470dc916923fc2fa89a7..5f26d56ee6bdbeebdeeeb2d4f712580c84bd67cd 100644 (file)
@@ -212,6 +212,11 @@ int naja_inst_M2G(scf_3ac_code_t* c, scf_function_t* f, scf_register_t* rd, scf_
        if (rd->bytes > size && scf_variable_signed(vs))
                opcode |= 0x1 << 19;
 
+       else if (scf_variable_float(vs) && 4 == size)
+               opcode |= 0x1 << 19;
+
+       scf_loge("SIZE: %d, size: %d\n", SIZE, size);
+
        opcode |= (rd->id << 21) | SIZE << 17;
        opcode |= RISC_COLOR_TYPE(rd->color) << 30;
 
@@ -321,6 +326,9 @@ int naja_inst_G2M(scf_3ac_code_t* c, scf_function_t* f, scf_register_t* rs, scf_
        opcode |= (rs->id << 21) | SIZE << 17;
        opcode |= RISC_COLOR_TYPE(rs->color) << 30;
 
+       if (scf_variable_float(vs) && 4 == size)
+               opcode |= (1 << 19);
+
        inst    = risc_make_inst(c, opcode);
        RISC_INST_ADD_CHECK(c->instructions, inst);
 
@@ -1350,33 +1358,33 @@ int naja_cmp_update(scf_3ac_code_t* c, scf_function_t* f, scf_instruction_t* cmp
 
        switch (cmp->code[3]) {
 
-               case 0x14:  // imm
-                       i0   = opcode & 0x1f;
-                       r0   = risc_find_register_type_id_bytes(0, i0, 8);
-                       inst = f->iops->MOV_G(c, r16, r0);  // use r16 to backup r0
-                       RISC_INST_ADD_CHECK(c->instructions, inst);
-
-                       opcode &= ~0x1f;
-                       opcode |=  0x10;
-                       break;
+               case 0x24:
+                       if (cmp->code[2] & 0x10) {
+                               i0   = opcode & 0x1f;
+                               r0   = risc_find_register_type_id_bytes(0, i0, 8);
+                               inst = f->iops->MOV_G(c, r16, r0);  // use r16 to backup r0
+                               RISC_INST_ADD_CHECK(c->instructions, inst);
 
-               case 0x24:  // register
-                       i0   =  opcode & 0x1f;
-                       i1   = (opcode >> 5) & 0x1f;
+                               opcode &= ~0x1f;
+                               opcode |=  0x10;
+                       } else {
+                               i0   =  opcode & 0x1f;
+                               i1   = (opcode >> 5) & 0x1f;
 
-                       r0   = risc_find_register_type_id_bytes(0, i0, 8);
-                       inst = f->iops->MOV_G(c, r16, r0);  // use r16 to backup r0
-                       RISC_INST_ADD_CHECK(c->instructions, inst);
+                               r0   = risc_find_register_type_id_bytes(0, i0, 8);
+                               inst = f->iops->MOV_G(c, r16, r0);  // use r16 to backup r0
+                               RISC_INST_ADD_CHECK(c->instructions, inst);
 
-                       r0   = risc_find_register_type_id_bytes(0, i1, 8);
-                       inst = f->iops->MOV_G(c, r17, r0);  // use r17 to backup r1
-                       RISC_INST_ADD_CHECK(c->instructions, inst);
+                               r0   = risc_find_register_type_id_bytes(0, i1, 8);
+                               inst = f->iops->MOV_G(c, r17, r0);  // use r17 to backup r1
+                               RISC_INST_ADD_CHECK(c->instructions, inst);
 
-                       opcode &= ~0x1f;
-                       opcode |=  0x10;
+                               opcode &= ~0x1f;
+                               opcode |=  0x10;
 
-                       opcode &= ~(0x1f << 5);
-                       opcode |=  (0x11 << 5);
+                               opcode &= ~(0x1f << 5);
+                               opcode |=  (0x11 << 5);
+                       }
                        break;
                default:
                        scf_loge("%#x\n", opcode);
index 23ea753277d8e9043c28ad91afadc38d65c10e9d..da0b7dc1ca53ee16d0f46cd640f5216321c902b9 100644 (file)
@@ -4,6 +4,16 @@
 #include"scf_elf.h"
 #include<dlfcn.h>
 
+#if 1
+#define NAJA_PRINTF   printf
+#else
+#define NAJA_PRINTF
+#endif
+
+#define NAJA_REG_FP   29
+#define NAJA_REG_LR   30
+#define NAJA_REG_SP   31
+
 typedef struct scf_vm_s       scf_vm_t;
 typedef struct scf_vm_ops_s   scf_vm_ops_t;
 
@@ -47,8 +57,18 @@ struct scf_vm_ops_s
 #define  SCF_VM_LE  4
 #define  SCF_VM_LT  5
 
+typedef union {
+       uint8_t  b[32];
+       uint16_t w[16];
+       uint32_t l[8];
+       uint64_t q[4];
+       float    f[8];
+       double   d[4];
+} fv256_t;
+
 typedef struct {
        uint64_t  regs[32];
+       fv256_t   fvec[32];
 
        uint64_t  ip;
        uint64_t  flags;
index 9656fa4cff2fc9a5a7db49bf5ea2bac1974c0253..1641d96ff99dcf36fb0e8da812987837e937e942 100644 (file)
@@ -1,23 +1,27 @@
 #include"scf_vm.h"
 
-#define NAJA_REG_FP   29
-#define NAJA_REG_LR   30
-#define NAJA_REG_SP   31
-
 static const char* somaps[][3] =
 {
        {"x64", "/lib/ld-linux-aarch64.so.1",       "/lib64/ld-linux-x86-64.so.2"},
        {"x64", "/lib/aarch64-linux-gnu/libc.so.6", "/lib/x86_64-linux-gnu/libc.so.6"},
 };
 
-typedef int64_t (*naja_dyn_func_pt)(uint64_t r0,
+typedef int (*dyn_func_pt)(uint64_t r0,
                uint64_t r1,
                uint64_t r2,
                uint64_t r3,
                uint64_t r4,
                uint64_t r5,
                uint64_t r6,
-               uint64_t r7);
+               uint64_t r7,
+               double   d0,
+               double   d1,
+               double   d2,
+               double   d3,
+               double   d4,
+               double   d5,
+               double   d6,
+               double   d7);
 
 int naja_vm_open(scf_vm_t* vm)
 {
@@ -46,8 +50,8 @@ int naja_vm_close(scf_vm_t* vm)
 
 static int naja_vm_dynamic_link(scf_vm_t* vm)
 {
-       naja_dyn_func_pt  f    = NULL;
-       scf_vm_naja_t*    naja = vm->priv;
+       scf_vm_naja_t* naja = vm->priv;
+       dyn_func_pt    f    = NULL;
 
        int64_t  sp = naja->regs[NAJA_REG_SP];
 
@@ -69,7 +73,6 @@ static int naja_vm_dynamic_link(scf_vm_t* vm)
                if (r16  == vm->jmprel[i].r_offset) {
 
                        int   j     = ELF64_R_SYM(vm->jmprel[i].r_info);
-
                        char* fname = vm->dynstr + vm->dynsym[j].st_name;
 
                        scf_logw("j: %d, %s\n", j, fname);
@@ -99,7 +102,15 @@ static int naja_vm_dynamic_link(scf_vm_t* vm)
                                                naja->regs[4],
                                                naja->regs[5],
                                                naja->regs[6],
-                                               naja->regs[7]);
+                                               naja->regs[7],
+                                               naja->fvec[0].d[0],
+                                               naja->fvec[1].d[0],
+                                               naja->fvec[2].d[0],
+                                               naja->fvec[3].d[0],
+                                               naja->fvec[4].d[0],
+                                               naja->fvec[5].d[0],
+                                               naja->fvec[6].d[0],
+                                               naja->fvec[7].d[0]);
 
                                naja->regs[NAJA_REG_SP] += 16;
                                return 0;
@@ -290,7 +301,7 @@ static int __naja_add(scf_vm_t* vm, uint32_t inst)
 
                naja->regs[rd]  = naja->regs[rs0] + uimm15;
 
-               printf("add    r%d, r%d, %lu\n", rd, rs0, uimm15);
+               NAJA_PRINTF("add    r%d, r%d, %lu\n", rd, rs0, uimm15);
        } else {
                uint64_t sh     = (inst >> 18) & 0x3;
                uint64_t uimm8  = (inst >> 10) & 0xff;
@@ -299,17 +310,17 @@ static int __naja_add(scf_vm_t* vm, uint32_t inst)
                if (0 == sh) {
                        naja->regs[rd]  = naja->regs[rs0] + (naja->regs[rs1] << uimm8);
 
-                       printf("add    r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("add    r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
 
                } else if (1 == sh) {
                        naja->regs[rd]  = naja->regs[rs0] + (naja->regs[rs1] >> uimm8);
 
-                       printf("add    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("add    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
 
                } else {
                        naja->regs[rd]  = naja->regs[rs0] + (((int64_t)naja->regs[rs1]) >> uimm8);
 
-                       printf("add    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("add    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
                }
        }
 
@@ -317,6 +328,60 @@ static int __naja_add(scf_vm_t* vm, uint32_t inst)
        return 0;
 }
 
+static int __naja_fadd(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+
+       naja->fvec[rd].d[0]  = naja->fvec[rs0].d[0] + naja->fvec[rs1].d[0];
+
+       NAJA_PRINTF("fadd   d%d, d%d, d%d\n", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fsub(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+
+       naja->fvec[rd].d[0]  = naja->fvec[rs0].d[0] - naja->fvec[rs1].d[0];
+
+       NAJA_PRINTF("fsub   r%d, r%d, r%d\n", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fcmp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0  =  inst        & 0x1f;
+       int rs1  = (inst >>  5) & 0x1f;
+
+       double d = naja->fvec[rs0].d[0] - naja->fvec[rs1].d[0];
+
+       NAJA_PRINTF("fcmp   d%d, d%d\n", rs0, rs1);
+
+       if (d > 0.0)
+               naja->flags = 0x4;
+       else if (d < 0.0)
+               naja->flags = 0x2;
+       else
+               naja->flags = 0x1;
+
+       naja->ip += 4;
+       return 0;
+}
+
 static int __naja_sub(scf_vm_t* vm, uint32_t inst)
 {
        scf_vm_naja_t* naja = vm->priv;
@@ -330,7 +395,7 @@ static int __naja_sub(scf_vm_t* vm, uint32_t inst)
 
                naja->regs[rd]  = naja->regs[rs0] - uimm15;
 
-               printf("sub    r%d, r%d, %lu\n", rd, rs0, uimm15);
+               NAJA_PRINTF("sub    r%d, r%d, %lu\n", rd, rs0, uimm15);
        } else {
                uint64_t sh     = (inst >> 18) & 0x3;
                uint64_t uimm8  = (inst >> 10) & 0xff;
@@ -339,17 +404,17 @@ static int __naja_sub(scf_vm_t* vm, uint32_t inst)
                if (0 == sh) {
                        naja->regs[rd]  = naja->regs[rs0] - (naja->regs[rs1] << uimm8);
 
-                       printf("sub    r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("sub    r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
 
                } else if (1 == sh) {
                        naja->regs[rd]  = naja->regs[rs0] - (naja->regs[rs1] >> uimm8);
 
-                       printf("sub    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("sub    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
 
                } else {
                        naja->regs[rd]  = naja->regs[rs0] - (((int64_t)naja->regs[rs1]) >> uimm8);
 
-                       printf("sub    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
+                       NAJA_PRINTF("sub    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
                }
        }
 
@@ -362,7 +427,6 @@ static int __naja_cmp(scf_vm_t* vm, uint32_t inst)
        scf_vm_naja_t* naja = vm->priv;
 
        int rs0 =  inst        & 0x1f;
-       int rd  = (inst >> 21) & 0x1f;
        int I   = (inst >> 20) & 0x1;
 
        int ret = 0;
@@ -372,7 +436,7 @@ static int __naja_cmp(scf_vm_t* vm, uint32_t inst)
 
                ret = naja->regs[rs0] - uimm15;
 
-               printf("cmp    r%d, %ld,  rs0: %lx, ret: %d\n", rs0, uimm15, naja->regs[rs0], ret);
+               NAJA_PRINTF("cmp    r%d, %ld,  rs0: %lx, ret: %d\n", rs0, uimm15, naja->regs[rs0], ret);
 
        } else {
                uint64_t sh     = (inst >> 18) & 0x3;
@@ -382,17 +446,17 @@ static int __naja_cmp(scf_vm_t* vm, uint32_t inst)
                if (0 == sh) {
                        ret = naja->regs[rs0] - (naja->regs[rs1] << uimm8);
 
-                       printf("cmp    r%d, r%d LSL %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       NAJA_PRINTF("cmp    r%d, r%d LSL %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
 
                } else if (1 == sh) {
                        ret = naja->regs[rs0] - (naja->regs[rs1] >> uimm8);
 
-                       printf("cmp    r%d, r%d LSR %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       NAJA_PRINTF("cmp    r%d, r%d LSR %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
 
                } else {
                        ret = naja->regs[rs0] - (((int64_t)naja->regs[rs1]) >> uimm8);
 
-                       printf("cmp    r%d, r%d ASR %ld,  rs0: %#lx, rs1: %ld, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       NAJA_PRINTF("cmp    r%d, r%d ASR %ld,  rs0: %#lx, rs1: %ld, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
                }
        }
 
@@ -418,17 +482,82 @@ static int __naja_mul(scf_vm_t* vm, uint32_t inst)
        int S   = (inst >> 20) & 0x1;
        int opt = (inst >> 15) & 0x3;
 
-       scf_logw("\n");
        if (S)
                naja->regs[rd] = (int64_t)naja->regs[rs0] * (int64_t)naja->regs[rs1];
        else
                naja->regs[rd] = naja->regs[rs0] * naja->regs[rs1];
 
-       if (0 == opt)
+       if (0 == opt) {
                naja->regs[rd] += naja->regs[rs2];
-       else if (1 == opt)
+
+               NAJA_PRINTF("madd   r%d, r%d, r%d, r%d", rd, rs2, rs0, rs1);
+       } else if (1 == opt) {
                naja->regs[rd]  = naja->regs[rs2] - naja->regs[rd];
 
+               NAJA_PRINTF("msub   r%d, r%d, r%d, r%d", rd, rs2, rs0, rs1);
+       } else {
+               if (S)
+                       NAJA_PRINTF("smul   r%d, r%d, r%d", rd, rs0, rs1);
+               else
+                       NAJA_PRINTF("mul    r%d, r%d, r%d", rd, rs0, rs1);
+       }
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fmul(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rs2 = (inst >> 10) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 18) & 0x3;
+
+       naja->fvec[rd].d[0] = naja->fvec[rs0].d[0] * naja->fvec[rs1].d[0];
+
+       if (0 == opt) {
+               naja->fvec[rd].d[0] += naja->fvec[rs2].d[0];
+
+               NAJA_PRINTF("fmadd   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+
+       } else if (1 == opt) {
+               naja->fvec[rd].d[0]  = naja->fvec[rs2].d[0] - naja->fvec[rd].d[0];
+
+               NAJA_PRINTF("fmsub   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       } else
+               NAJA_PRINTF("fmul    d%d, d%d, d%d", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fdiv(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rs2 = (inst >> 10) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 18) & 0x3;
+
+       naja->fvec[rd].d[0] = naja->fvec[rs0].d[0] / naja->fvec[rs1].d[0];
+
+       if (0 == opt) {
+               naja->fvec[rd].d[0] += naja->fvec[rs2].d[0];
+
+               NAJA_PRINTF("fdadd   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+
+       } else if (1 == opt) {
+               naja->fvec[rd].d[0]  = naja->fvec[rs2].d[0] - naja->fvec[rd].d[0];
+
+               NAJA_PRINTF("fdsub   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       } else
+               NAJA_PRINTF("fdiv    d%d, d%d, d%d", rd, rs0, rs1);
+
        naja->ip += 4;
        return 0;
 }
@@ -444,39 +573,36 @@ static int __naja_div(scf_vm_t* vm, uint32_t inst)
        int S   = (inst >> 20) & 0x1;
        int opt = (inst >> 15) & 0x3;
 
-       scf_logw("\n");
        if (S)
                naja->regs[rd] = (int64_t)naja->regs[rs0] / (int64_t)naja->regs[rs1];
        else
                naja->regs[rd] = naja->regs[rs0] / naja->regs[rs1];
 
-       if (0 == opt)
+       if (0 == opt) {
                naja->regs[rd] += naja->regs[rs2];
-       else if (1 == opt)
+
+               NAJA_PRINTF("fdadd   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+
+       } else if (1 == opt) {
                naja->regs[rd]  = naja->regs[rs2] - naja->regs[rd];
 
+               NAJA_PRINTF("dsub   r%d, r%d, r%d, r%d", rd, rs2, rs0, rs1);
+       } else {
+               if (S)
+                       NAJA_PRINTF("sdiv   r%d, r%d, r%d", rd, rs0, rs1);
+               else
+                       NAJA_PRINTF("div    r%d, r%d, r%d", rd, rs0, rs1);
+       }
+
        naja->ip += 4;
        return 0;
 }
 
-static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
+static int __naja_mem(scf_vm_t* vm, int64_t addr, uint8_t** pdata, int64_t* poffset)
 {
-       scf_vm_naja_t* naja = vm->priv;
-
-       int rb  =  inst        & 0x1f;
-       int rd  = (inst >> 21) & 0x1f;
-       int A   = (inst >> 20) & 0x1;
-       int ext = (inst >> 17) & 0x7;
-       int s12 = (inst >>  5) & 0xfff;
-
-       if (s12  & 0x800)
-               s12 |= 0xfffff000;
-
-       scf_logd("rd: %d, rb: %d, s12: %d, ext: %d\n", rd, rb, s12, ext);
-
-       int64_t  addr   = naja->regs[rb];
-       int64_t  offset = 0;
-       uint8_t* data;
+       scf_vm_naja_t* naja   = vm->priv;
+       uint8_t*       data   = NULL;
+       int64_t        offset = 0;
 
        if (addr >= (int64_t)vm->data->data) {
                data  = (uint8_t*)addr;
@@ -534,6 +660,34 @@ static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
                offset = addr;
        }
 
+       *poffset = offset;
+       *pdata   = data;
+       return 0;
+}
+
+static int __naja_fstr_disp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
+       int ext = (inst >> 17) & 0x7;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       scf_logd("rd: %d, rb: %d, s12: %d, ext: %d\n", rd, rb, s12, ext);
+
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
+
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
+
        if (!A)
                offset += s12 << (ext & 0x3);
 
@@ -552,70 +706,93 @@ static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
        }
 
        switch (ext) {
-               case 0:
-                       naja->regs[rd] = *(uint8_t*)(data + offset);
-                       if (A) {
-                               naja->regs[rb] += s12;
-                               printf("ldrb   r%d, [r%d, %d]!\n", rd, rb, s12);
-                       } else
-                               printf("ldrb   r%d, [r%d, %d]\n", rd, rb, s12);
-                       break;
-
-               case 1:
-                       naja->regs[rd] = *(uint16_t*)(data + offset);
+               case 3:
+                       *(double*)(data + offset) = naja->fvec[rd].d[0];
                        if (A) {
-                               naja->regs[rb] += s12 << 1;
-                               printf("ldrw   r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                               naja->regs[rb] += s12 << 3;
+                               NAJA_PRINTF("fstr    d%d, [r%d, %d]!, rd: %lg, rb: %ld, %p\n", rd, rb, s12 << 3, naja->fvec[rd].d[0], naja->regs[rb], data + offset);
                        } else
-                               printf("ldrw   r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                               NAJA_PRINTF("fstr    d%d, [r%d, %d]\n", rd, rb, s12 << 3);
                        break;
 
-               case 2:
-                       naja->regs[rd] = *(uint32_t*)(data + offset);
+               case 6:
+                       *(float*)(data + offset) = naja->fvec[rd].d[0];
                        if (A) {
                                naja->regs[rb] += s12 << 2;
-                               printf("ldrl   r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                               NAJA_PRINTF("fstrf  f%d, [r%d, %d]!\n", rd, rb, s12 << 2);
                        } else
-                               printf("ldrl   r%d, [r%d, %d],  %ld, %p\n", rd, rb, s12 << 2, naja->regs[rd], data + offset);
+                               NAJA_PRINTF("fstrf  f%d, [r%d, %d]\n", rd, rb, s12 << 2);
                        break;
-
-               case 3:
-                       naja->regs[rd] = *(uint64_t*)(data + offset);
-                       if (A) {
-                               naja->regs[rb] += s12 << 3;
-                               printf("ldr    r%d, [r%d, %d]!, rd: %#lx, rb: %ld, %p\n", rd, rb, s12 << 3, naja->regs[rd], naja->regs[rb], data + offset);
-                       } else
-                               printf("ldr    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
+               default:
+                       scf_loge("ext: %d\n", ext);
+                       return -1;
                        break;
+       };
 
-               case 4:
-                       naja->regs[rd] = *(int8_t*)(data + offset);
-                       if (A) {
-                               naja->regs[rb] += s12;
-                               printf("ldrsb  r%d, [r%d, %d]!\n", rd, rb, s12);
-                       } else
-                               printf("ldrsb  r%d, [r%d, %d]\n", rd, rb, s12);
-                       break;
+       naja->ip += 4;
+       return 0;
+}
 
-               case 5:
-                       naja->regs[rd] = *(int16_t*)(data + offset);
+static int __naja_fldr_disp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
+       int ext = (inst >> 17) & 0x7;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       scf_logd("rd: %d, rb: %d, s12: %d, ext: %d\n", rd, rb, s12, ext);
+
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
+
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
+
+       if (!A)
+               offset += s12 << (ext & 0x3);
+
+       if (data   == naja->stack) {
+               offset = -offset;
+
+               scf_logd("offset0: %ld, size: %ld\n", offset, naja->size);
+               assert(offset >= 0);
+
+               if (naja->size < offset) {
+                       scf_loge("offset: %ld, size: %ld\n", offset, naja->size);
+                       return -EINVAL;
+               }
+
+               offset -= 1 << (ext & 0x3);
+       }
+
+       switch (ext) {
+               case 3:
+                       naja->fvec[rd].d[0] = *(double*)(data + offset);
                        if (A) {
-                               naja->regs[rb] += s12 << 1;
-                               printf("ldrsw  r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                               naja->regs[rb] += s12 << 3;
+                               NAJA_PRINTF("fldr    d%d, [r%d, %d]!, rd: %lg, rb: %ld, %p\n", rd, rb, s12 << 3, naja->fvec[rd].d[0], naja->regs[rb], data + offset);
                        } else
-                               printf("ldrsw  r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                               NAJA_PRINTF("fldr    d%d, [r%d, %d], rd: %lg, rb: %ld, %p\n", rd, rb, s12 << 3, naja->fvec[rd].d[0], naja->regs[rb], data + offset);
                        break;
 
                case 6:
-                       naja->regs[rd] = *(int32_t*)(data + offset);
+                       naja->fvec[rd].d[0] = *(float*)(data + offset);
                        if (A) {
                                naja->regs[rb] += s12 << 2;
-                               printf("ldrsl  r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                               NAJA_PRINTF("fldrf   f%d, [r%d, %d]!, rd: %lg, rb: %ld, %p\n", rd, rb, s12 << 2, naja->fvec[rd].d[0], naja->regs[rb], data + offset);
                        } else
-                               printf("ldrsl  r%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                               NAJA_PRINTF("fldrf   f%d, [r%d, %d], rd: %lg, rb: %ld, %p\n", rd, rb, s12 << 2, naja->fvec[rd].d[0], naja->regs[rb], data + offset);
                        break;
                default:
-                       scf_loge("\n");
+                       scf_loge("ext: %d\n", ext);
                        return -1;
                        break;
        };
@@ -624,80 +801,136 @@ static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
        return 0;
 }
 
-static int __naja_ldr_sib(scf_vm_t* vm, uint32_t inst)
+static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
 {
        scf_vm_naja_t* naja = vm->priv;
 
        int rb  =  inst        & 0x1f;
-       int ri  = (inst >>  5) & 0x1f;
        int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
        int ext = (inst >> 17) & 0x7;
-       int u7  = (inst >> 10) & 0x7f;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       scf_logd("rd: %d, rb: %d, s12: %d, ext: %d\n", rd, rb, s12, ext);
 
        int64_t  addr   = naja->regs[rb];
-       uint64_t offset;
-       uint8_t* data;
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
 
-       scf_logd("ldr   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
-       scf_logd("rd: %#lx\n", naja->regs[rd]);
-       scf_logd("rb: %#lx\n", naja->regs[rb]);
-       scf_logd("ri: %#lx\n", naja->regs[ri]);
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
 
-       if (addr >= (int64_t)vm->data->data) {
-               data  = (uint8_t*)addr;
+       if (!A)
+               offset += s12 << (ext & 0x3);
 
-               if (addr >= (int64_t)vm->data->data + vm->data->len) {
-                       scf_loge("addr: %#lx, %#lx\n", addr, (int64_t)vm->data->data + vm->data->len);
-                       return -1;
-               }
+       if (data   == naja->stack) {
+               offset = -offset;
 
-       } else if (addr >= (int64_t)vm->rodata->data) {
-               data  = (uint8_t*)addr;
+               scf_logd("offset0: %ld, size: %ld\n", offset, naja->size);
+               assert(offset >= 0);
 
-               if (addr >= (int64_t)vm->rodata->data + vm->rodata->len) {
-                       scf_loge("\n");
-                       return -1;
+               if (naja->size < offset) {
+                       scf_loge("offset: %ld, size: %ld\n", offset, naja->size);
+                       return -EINVAL;
                }
 
-       } else if (addr >= (int64_t)vm->text->data) {
-               data  = (uint8_t*)addr;
+               offset -= 1 << (ext & 0x3);
+       }
 
-               if (addr >= (int64_t)vm->text->data + vm->text->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+       switch (ext) {
+               case 0:
+                       naja->regs[rd] = *(uint8_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12;
+                               NAJA_PRINTF("ldrb   r%d, [r%d, %d]!\n", rd, rb, s12);
+                       } else
+                               NAJA_PRINTF("ldrb   r%d, [r%d, %d]\n", rd, rb, s12);
+                       break;
 
-       } else if (addr  >= 0x800000) {
-               data   = vm->data->data;
-               offset = addr - vm->data->addr;
+               case 1:
+                       naja->regs[rd] = *(uint16_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12 << 1;
+                               NAJA_PRINTF("ldrw   r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                       } else
+                               NAJA_PRINTF("ldrw   r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                       break;
 
-               if (offset >= vm->data->len) {
+               case 2:
+                       naja->regs[rd] = *(uint32_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12 << 2;
+                               NAJA_PRINTF("ldrl   r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                       } else
+                               NAJA_PRINTF("ldrl   r%d, [r%d, %d],  %ld, %p\n", rd, rb, s12 << 2, naja->regs[rd], data + offset);
+                       break;
+
+               case 3:
+                       naja->regs[rd] = *(uint64_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12 << 3;
+                               NAJA_PRINTF("ldr    r%d, [r%d, %d]!, rd: %#lx, rb: %ld, %p\n", rd, rb, s12 << 3, naja->regs[rd], naja->regs[rb], data + offset);
+                       } else
+                               NAJA_PRINTF("ldr    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                       break;
+
+               case 4:
+                       naja->regs[rd] = *(int8_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12;
+                               NAJA_PRINTF("ldrsb  r%d, [r%d, %d]!\n", rd, rb, s12);
+                       } else
+                               NAJA_PRINTF("ldrsb  r%d, [r%d, %d]\n", rd, rb, s12);
+                       break;
+
+               case 5:
+                       naja->regs[rd] = *(int16_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12 << 1;
+                               NAJA_PRINTF("ldrsw  r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                       } else
+                               NAJA_PRINTF("ldrsw  r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                       break;
+
+               case 6:
+                       naja->regs[rd] = *(int32_t*)(data + offset);
+                       if (A) {
+                               naja->regs[rb] += s12 << 2;
+                               NAJA_PRINTF("ldrsl  r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                       } else
+                               NAJA_PRINTF("ldrsl  r%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                       break;
+               default:
                        scf_loge("\n");
                        return -1;
-               }
+                       break;
+       };
 
-       } else if (addr >= 0x600000) {
-               data   = vm->rodata->data;
-               offset = addr - vm->rodata->addr;
+       naja->ip += 4;
+       return 0;
+}
 
-               if (offset >= vm->rodata->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+static int __naja_ldr_sib(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
 
-       } else if (addr >= 0x400000) {
-               data   = vm->text->data;
-               offset = addr - vm->text->addr;
+       int rb  =  inst        & 0x1f;
+       int ri  = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int ext = (inst >> 17) & 0x7;
+       int u7  = (inst >> 10) & 0x7f;
 
-               if (offset >= vm->text->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
 
-       } else {
-               data   = naja->stack;
-               offset = addr;
-       }
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
 
        offset += (naja->regs[ri] << u7);
 
@@ -716,43 +949,43 @@ static int __naja_ldr_sib(scf_vm_t* vm, uint32_t inst)
 
        switch (ext) {
                case 0:
-                       printf("ldrb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(uint8_t*)(data + offset);
                        break;
 
                case 1:
-                       printf("ldrw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(uint16_t*)(data + offset);
                        break;
 
                case 2:
-                       printf("ldrl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(uint32_t*)(data + offset);
                        break;
 
                case 3:
-                       printf("ldr   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldr   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(uint64_t*)(data + offset);
                        break;
 
                case 4:
-                       printf("ldrsb r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrsb r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(int8_t*)(data + offset);
                        break;
 
                case 5:
-                       printf("ldrsw r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrsw r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(int16_t*)(data + offset);
                        break;
 
                case 6:
-                       printf("ldrsl r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("ldrsl r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        naja->regs[rd] = *(int32_t*)(data + offset);
                        break;
@@ -766,80 +999,136 @@ static int __naja_ldr_sib(scf_vm_t* vm, uint32_t inst)
        return 0;
 }
 
-static int __naja_str_disp(scf_vm_t* vm, uint32_t inst)
+static int __naja_fldr_sib(scf_vm_t* vm, uint32_t inst)
 {
        scf_vm_naja_t* naja = vm->priv;
 
        int rb  =  inst        & 0x1f;
+       int ri  = (inst >>  5) & 0x1f;
        int rd  = (inst >> 21) & 0x1f;
-       int A   = (inst >> 20) & 0x1;
-       int ext = (inst >> 17) & 0x3;
-       int s12 = (inst >>  5) & 0xfff;
-
-       if (s12  & 0x800)
-               s12 |= 0xfffff000;
+       int ext = (inst >> 17) & 0x7;
+       int u7  = (inst >> 10) & 0x7f;
 
        int64_t  addr   = naja->regs[rb];
        int64_t  offset = 0;
-       uint8_t* data;
+       uint8_t* data   = NULL;
 
-       if (addr >= (int64_t)vm->data->data) {
-               data  = (uint8_t*)addr;
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
 
-               if (addr >= (int64_t)vm->data->data + vm->data->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+       offset += (naja->regs[ri] << u7);
 
-       } else if (addr >= (int64_t)vm->rodata->data) {
-               data  = (uint8_t*)addr;
+       if (data   == naja->stack) {
+               offset = -offset;
 
-               if (addr >= (int64_t)vm->rodata->data + vm->rodata->len) {
+               assert(offset >= 0);
+
+               if (naja->size < offset) {
                        scf_loge("\n");
                        return -1;
                }
 
-       } else if (addr >= (int64_t)vm->text->data) {
-               data  = (uint8_t*)addr;
+               offset -= 1 << (ext & 0x3);
+       }
 
-               if (addr >= (int64_t)vm->text->data + vm->text->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+       switch (ext) {
+               case 3:
+                       NAJA_PRINTF("fldr  d%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
-       } else if (addr  >= 0x800000) {
-               data   = vm->data->data;
-               offset = addr - vm->data->addr;
+                       naja->fvec[rd].d[0] = *(double*)(data + offset);
+                       break;
 
-               scf_loge("rb: %d, offset: %ld, addr: %#lx, %#lx\n", rb, offset, addr, vm->data->addr);
+               case 6:
+                       NAJA_PRINTF("fldrf f%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
-               if (offset >= vm->data->len) {
+                       naja->fvec[rd].d[0] = *(float*)(data + offset);
+                       break;
+               default:
                        scf_loge("\n");
                        return -1;
-               }
+                       break;
+       };
 
-       } else if (addr >= 0x600000) {
-               data   = vm->rodata->data;
-               offset = addr - vm->rodata->addr;
+       naja->ip += 4;
+       return 0;
+}
 
-               if (offset >= vm->rodata->len) {
+static int __naja_fstr_sib(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int ri  = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int ext = (inst >> 17) & 0x3;
+       int u7  = (inst >> 10) & 0x7f;
+
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
+
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
+
+       offset += naja->regs[ri] << u7;
+
+       if (data   == naja->stack) {
+               offset = -offset;
+
+               assert(offset >= 0);
+
+               if (naja->size < offset) {
                        scf_loge("\n");
                        return -1;
                }
 
-       } else if (addr >= 0x400000) {
-               data   = vm->text->data;
-               offset = addr - vm->text->addr;
+               offset -= 1 << ext;
+       }
 
-               if (offset >= vm->text->len) {
+       switch (ext) {
+               case 3:
+                       NAJA_PRINTF("fstr  d%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+
+                       *(double*)(data + offset) = naja->fvec[rd].d[0];
+                       break;
+
+               case 6:
+                       NAJA_PRINTF("fldrf f%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+
+                       *(float*)(data + offset) = naja->fvec[rd].d[0];
+                       break;
+               default:
                        scf_loge("\n");
                        return -1;
-               }
+                       break;
+       };
 
-       } else {
-               data   = naja->stack;
-               offset = addr;
-       }
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_str_disp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
+       int ext = (inst >> 17) & 0x3;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
+
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
 
        offset += s12 << ext;
 
@@ -867,36 +1156,36 @@ static int __naja_str_disp(scf_vm_t* vm, uint32_t inst)
                        *(uint8_t*)(data + offset) = naja->regs[rd];
                        if (A) {
                                naja->regs[rb] += s12;
-                               printf("strb   r%d, [r%d, %d]!\n", rd, rb, s12);
+                               NAJA_PRINTF("strb   r%d, [r%d, %d]!\n", rd, rb, s12);
                        } else
-                               printf("strb   r%d, [r%d, %d]\n", rd, rb, s12);
+                               NAJA_PRINTF("strb   r%d, [r%d, %d]\n", rd, rb, s12);
                        break;
 
                case 1:
                        *(uint16_t*)(data + offset) = naja->regs[rd];
                        if (A) {
                                naja->regs[rb] += s12 << 1;
-                               printf("strw   r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                               NAJA_PRINTF("strw   r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
                        } else
-                               printf("strw   r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                               NAJA_PRINTF("strw   r%d, [r%d, %d]\n", rd, rb, s12 << 1);
                        break;
 
                case 2:
                        *(uint32_t*)(data + offset) = naja->regs[rd];
                        if (A) {
                                naja->regs[rb] += s12 << 2;
-                               printf("strl   r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                               NAJA_PRINTF("strl   r%d, [r%d, %d]!, s12: %d\n", rd, rb, s12 << 2, s12);
                        } else
-                               printf("strl   r%d, [r%d, %d],  %d, %p\n", rd, rb, s12 << 2, *(uint32_t*)(data + offset), data + offset);
+                               NAJA_PRINTF("strl   r%d, [r%d, %d],  s12: %d, %d, %p\n", rd, rb, s12 << 2, s12, *(uint32_t*)(data + offset), data + offset);
                        break;
 
                case 3:
                        *(uint64_t*)(data + offset) = naja->regs[rd];
                        if (A) {
                                naja->regs[rb] += s12 << 3;
-                               printf("str    r%d, [r%d, %d]!, rd: %#lx, rb: %ld, %p\n", rd, rb, s12 << 3, naja->regs[rd], naja->regs[rb], data + offset);
+                               NAJA_PRINTF("str    r%d, [r%d, %d]!, rd: %#lx, rb: %ld, %p\n", rd, rb, s12 << 3, naja->regs[rd], naja->regs[rb], data + offset);
                        } else
-                               printf("str    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                               NAJA_PRINTF("str    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
                        break;
 
                default:
@@ -919,65 +1208,13 @@ static int __naja_str_sib(scf_vm_t* vm, uint32_t inst)
        int ext = (inst >> 17) & 0x3;
        int u7  = (inst >> 10) & 0x7f;
 
-       uint64_t addr   = naja->regs[rb];
-       uint64_t offset = 0;
-       uint8_t* data;
-
-       if (addr >= (int64_t)vm->data->data) {
-               data  = (uint8_t*)addr;
-
-               if (addr >= (int64_t)vm->data->data + vm->data->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
-
-       } else if (addr >= (int64_t)vm->rodata->data) {
-               data  = (uint8_t*)addr;
-
-               if (addr >= (int64_t)vm->rodata->data + vm->rodata->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
-
-       } else if (addr >= (int64_t)vm->text->data) {
-               data  = (uint8_t*)addr;
-
-               if (addr >= (int64_t)vm->text->data + vm->text->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
-
-       } else if (addr  >= 0x800000) {
-               data   = vm->data->data;
-               offset = addr - vm->data->addr;
-
-               if (offset >= vm->data->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
-
-       } else if (addr >= 0x600000) {
-               data   = vm->rodata->data;
-               offset = addr - vm->rodata->addr;
-
-               if (offset >= vm->rodata->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
-
-       } else if (addr >= 0x400000) {
-               data   = vm->text->data;
-               offset = addr - vm->text->addr;
-
-               if (offset >= vm->text->len) {
-                       scf_loge("\n");
-                       return -1;
-               }
+       int64_t  addr   = naja->regs[rb];
+       int64_t  offset = 0;
+       uint8_t* data   = NULL;
 
-       } else {
-               data   = naja->stack;
-               offset = addr;
-       }
+       int ret = __naja_mem(vm, addr, &data, &offset);
+       if (ret < 0)
+               return ret;
 
        offset += naja->regs[ri] << u7;
 
@@ -996,25 +1233,25 @@ static int __naja_str_sib(scf_vm_t* vm, uint32_t inst)
 
        switch (ext) {
                case 0:
-                       printf("strb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("strb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        *(uint8_t*)(data + offset) = naja->regs[rd];
                        break;
 
                case 1:
-                       printf("strw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("strw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        *(uint16_t*)(data + offset) = naja->regs[rd];
                        break;
 
                case 2:
-                       printf("strl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("strl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        *(uint32_t*)(data + offset) = naja->regs[rd];
                        break;
 
                case 3:
-                       printf("str   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       NAJA_PRINTF("str   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
 
                        *(uint64_t*)(data + offset) = naja->regs[rd];
                        break;
@@ -1131,7 +1368,7 @@ static int __naja_jmp_disp(scf_vm_t* vm, uint32_t inst)
                simm26 |= 0xfc000000;
 
        naja->ip += simm26 << 2;
-       printf("jmp    %#lx\n", naja->ip);
+       NAJA_PRINTF("jmp    %#lx\n", naja->ip);
        return 0;
 }
 
@@ -1148,7 +1385,7 @@ static int __naja_call_disp(scf_vm_t* vm, uint32_t inst)
 
        naja->ip += simm26 << 2;
 
-       printf("call   %#lx\n", naja->ip);
+       NAJA_PRINTF("call   %#lx\n", naja->ip);
        return 0;
 }
 
@@ -1172,22 +1409,22 @@ static int __naja_jmp_reg(scf_vm_t* vm, uint32_t inst)
                        naja->ip  += 4;
 
                if (0 == cc)
-                       printf("jz     %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jz     %#lx, flags: %#lx\n", naja->ip, naja->flags);
 
                else if (1 == cc)
-                       printf("jnz    %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jnz    %#lx, flags: %#lx\n", naja->ip, naja->flags);
 
                else if (2 == cc)
-                       printf("jge    %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jge    %#lx, flags: %#lx\n", naja->ip, naja->flags);
 
                else if (3 == cc)
-                       printf("jgt    %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jgt    %#lx, flags: %#lx\n", naja->ip, naja->flags);
 
                else if (4 == cc)
-                       printf("jle    %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jle    %#lx, flags: %#lx\n", naja->ip, naja->flags);
 
                else if (5 == cc)
-                       printf("jlt    %#lx, flags: %#lx\n", naja->ip, naja->flags);
+                       NAJA_PRINTF("jlt    %#lx, flags: %#lx\n", naja->ip, naja->flags);
                else {
                        scf_loge("\n");
                        return -EINVAL;
@@ -1198,7 +1435,7 @@ static int __naja_jmp_reg(scf_vm_t* vm, uint32_t inst)
 
                if (naja_vm_dynamic_link == (void*)naja->regs[rd]) {
 
-                       printf("\033[36mjmp    r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
+                       NAJA_PRINTF("\033[36mjmp    r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
 
                        int ret = naja_vm_dynamic_link(vm);
                        if (ret < 0) {
@@ -1211,24 +1448,32 @@ static int __naja_jmp_reg(scf_vm_t* vm, uint32_t inst)
                } else if (naja->regs[rd] < vm->text->addr
                                || naja->regs[rd] > vm->text->addr + vm->text->len) {
 
-                       printf("\033[36mjmp    r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
+                       NAJA_PRINTF("\033[36mjmp    r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
 
-                       naja_dyn_func_pt pt = (naja_dyn_func_pt) naja->regs[rd];
+                       dyn_func_pt f = (void*)naja->regs[rd];
 
-                       naja->regs[0] = pt(naja->regs[0],
+                       naja->regs[0] = f(naja->regs[0],
                                        naja->regs[1],
                                        naja->regs[2],
                                        naja->regs[3],
                                        naja->regs[4],
                                        naja->regs[5],
                                        naja->regs[6],
-                                       naja->regs[7]);
+                                       naja->regs[7],
+                                       naja->fvec[0].d[0],
+                                       naja->fvec[1].d[0],
+                                       naja->fvec[2].d[0],
+                                       naja->fvec[3].d[0],
+                                       naja->fvec[4].d[0],
+                                       naja->fvec[5].d[0],
+                                       naja->fvec[6].d[0],
+                                       naja->fvec[7].d[0]);
 
                        naja->ip = naja->regs[NAJA_REG_LR];
                } else {
                        naja->ip = naja->regs[rd];
 
-                       printf("jmp    r%d, %#lx\n", rd, naja->regs[rd]);
+                       NAJA_PRINTF("jmp    r%d, %#lx\n", rd, naja->regs[rd]);
                }
        }
 
@@ -1246,7 +1491,7 @@ static int __naja_call_reg(scf_vm_t* vm, uint32_t inst)
 
        if (naja_vm_dynamic_link == (void*)naja->regs[rd]) {
 
-               printf("\033[36mcall  r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
+               NAJA_PRINTF("\033[36mcall  r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
 
                int ret = naja_vm_dynamic_link(vm);
                if (ret < 0) {
@@ -1259,22 +1504,30 @@ static int __naja_call_reg(scf_vm_t* vm, uint32_t inst)
        } else if (naja->regs[rd] < vm->text->addr
                        || naja->regs[rd] > vm->text->addr + vm->text->len) {
 
-               printf("\033[36mcall  r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
-
-               naja_dyn_func_pt pt = (naja_dyn_func_pt) naja->regs[rd];
-
-               naja->regs[0] = pt(naja->regs[0],
-                                          naja->regs[1],
-                                          naja->regs[2],
-                                          naja->regs[3],
-                                          naja->regs[4],
-                                          naja->regs[5],
-                                          naja->regs[6],
-                                          naja->regs[7]);
+               NAJA_PRINTF("\033[36mcall  r%d, %#lx@plt\033[0m\n", rd, naja->regs[rd]);
+
+               dyn_func_pt f = (void*)naja->regs[rd];
+
+               naja->regs[0]  = f(naja->regs[0],
+                               naja->regs[1],
+                               naja->regs[2],
+                               naja->regs[3],
+                               naja->regs[4],
+                               naja->regs[5],
+                               naja->regs[6],
+                               naja->regs[7],
+                               naja->fvec[0].d[0],
+                               naja->fvec[1].d[0],
+                               naja->fvec[2].d[0],
+                               naja->fvec[3].d[0],
+                               naja->fvec[4].d[0],
+                               naja->fvec[5].d[0],
+                               naja->fvec[6].d[0],
+                               naja->fvec[7].d[0]);
 
                naja->ip = naja->regs[NAJA_REG_LR];
        } else {
-               printf("call  r%d, %#lx\n", rd, naja->regs[rd]);
+               NAJA_PRINTF("call  r%d, %#lx\n", rd, naja->regs[rd]);
                naja->ip = naja->regs[rd];
        }
 
@@ -1302,7 +1555,7 @@ static int __naja_adrp(scf_vm_t* vm, uint32_t inst)
        else if (naja->regs[rd] >= 0x400000)
                naja->regs[rd]  = naja->regs[rd] - vm->text->addr + (uint64_t)vm->text->data;
 
-       printf("adrp   r%d, [rip, %d],  %#lx\n", rd, s21, naja->regs[rd]);
+       NAJA_PRINTF("adrp   r%d, [rip, %d],  %#lx\n", rd, s21, naja->regs[rd]);
 
        naja->ip += 4;
        return 0;
@@ -1329,7 +1582,7 @@ static int __naja_ret(scf_vm_t* vm, uint32_t inst)
                naja->size  = sp + STACK_INC;
        }
 
-       printf("ret,   %#lx, sp: %ld, stack->size: %ld\n", naja->ip, sp, naja->size);
+       NAJA_PRINTF("ret,   %#lx, sp: %ld, stack->size: %ld\n", naja->ip, sp, naja->size);
        return 0;
 }
 
@@ -1343,22 +1596,22 @@ static int __naja_setcc(scf_vm_t* vm, uint32_t inst)
        naja->regs[rd] = 0 == (cc & naja->flags);
 
        if (SCF_VM_Z == cc)
-               printf("setz   r%d\n", rd);
+               NAJA_PRINTF("setz   r%d\n", rd);
 
        else if (SCF_VM_NZ == cc)
-               printf("setnz  r%d\n", rd);
+               NAJA_PRINTF("setnz  r%d\n", rd);
 
        else if (SCF_VM_GE == cc)
-               printf("setge  r%d\n", rd);
+               NAJA_PRINTF("setge  r%d\n", rd);
 
        else if (SCF_VM_GT == cc)
-               printf("setgt  r%d\n", rd);
+               NAJA_PRINTF("setgt  r%d\n", rd);
 
        else if (SCF_VM_LT == cc)
-               printf("setlt  r%d\n", rd);
+               NAJA_PRINTF("setlt  r%d\n", rd);
 
        else if (SCF_VM_LE == cc)
-               printf("setle  r%d\n", rd);
+               NAJA_PRINTF("setle  r%d\n", rd);
        else {
                scf_loge("inst: %#x\n", inst);
                return -EINVAL;
@@ -1383,30 +1636,30 @@ static int __naja_mov(scf_vm_t* vm, uint32_t inst)
 
                        if (X && (inst & 0x8000)) {
                                naja->regs[rd] |= ~0xffffULL;
-                               printf("movsb  r%d, %d\n", rd, inst & 0xffff);
+                               NAJA_PRINTF("movsb  r%d, %d\n", rd, inst & 0xffff);
                        } else {
-                               printf("mov    r%d, %d\n", rd, inst & 0xffff);
+                               NAJA_PRINTF("mov    r%d, %d\n", rd, inst & 0xffff);
                        }
 
                } else if (1 == opt) {
                        naja->regs[rd] |= (inst & 0xffffULL) << 16;
 
-                       printf("mov    r%d, %d << 16\n", rd, inst & 0xffff);
+                       NAJA_PRINTF("mov    r%d, %d << 16\n", rd, inst & 0xffff);
 
                } else if (2 == opt) {
                        naja->regs[rd] |= (inst & 0xffffULL) << 32;
 
-                       printf("mov    r%d, %d << 32\n", rd, inst & 0xffff);
+                       NAJA_PRINTF("mov    r%d, %d << 32\n", rd, inst & 0xffff);
 
                } else if (3 == opt) {
                        naja->regs[rd] |= (inst & 0xffffULL) << 48;
 
-                       printf("mov    r%d, %d << 48\n", rd, inst & 0xffff);
+                       NAJA_PRINTF("mov    r%d, %d << 48\n", rd, inst & 0xffff);
 
                } else if (7 == opt) {
                        naja->regs[rd] = ~(inst & 0xffffULL);
 
-                       printf("mvn    r%d, %d\n", rd, inst & 0xffff);
+                       NAJA_PRINTF("mvn    r%d, %d\n", rd, inst & 0xffff);
                }
 
        } else {
@@ -1416,82 +1669,82 @@ static int __naja_mov(scf_vm_t* vm, uint32_t inst)
 
                if (0 == opt) {
                        if (X) {
-                               printf("mov    r%d, r%d LSL r%d\n", rd, rs, rs1);
+                               NAJA_PRINTF("mov    r%d, r%d LSL r%d\n", rd, rs, rs1);
 
                                naja->regs[rd] = naja->regs[rs] << naja->regs[rs1];
                        } else {
                                naja->regs[rd] = naja->regs[rs] << u11;
 
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       NAJA_PRINTF("mov    r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d LSL %d\n", rd, rs, u11);
+                                       NAJA_PRINTF("mov    r%d, r%d LSL %d\n", rd, rs, u11);
                        }
 
                } else if (1 == opt) {
                        if (X) {
-                               printf("mov    r%d, r%d LSR r%d\n", rd, rs, rs1);
+                               NAJA_PRINTF("mov    r%d, r%d LSR r%d\n", rd, rs, rs1);
 
                                naja->regs[rd] = naja->regs[rs] >> naja->regs[rs1];
                        } else {
                                naja->regs[rd] = naja->regs[rs] >> u11;
 
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       NAJA_PRINTF("mov    r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d LSR %d\n", rd, rs, u11);
+                                       NAJA_PRINTF("mov    r%d, r%d LSR %d\n", rd, rs, u11);
                        }
                } else if (2 == opt) {
                        if (X) {
-                               printf("mov    r%d, r%d ASR r%d\n", rd, rs, rs1);
+                               NAJA_PRINTF("mov    r%d, r%d ASR r%d\n", rd, rs, rs1);
 
                                naja->regs[rd] = (int64_t)naja->regs[rs] >> naja->regs[rs1];
                        } else {
                                naja->regs[rd] = (int64_t)naja->regs[rs] >> u11;
 
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       NAJA_PRINTF("mov    r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d ASR %d\n", rd, rs, u11);
+                                       NAJA_PRINTF("mov    r%d, r%d ASR %d\n", rd, rs, u11);
                        }
                } else if (3 == opt) {
-                       printf("NOT    r%d, r%d\n", rd, rs);
+                       NAJA_PRINTF("NOT    r%d, r%d\n", rd, rs);
 
                        naja->regs[rd] = ~naja->regs[rs];
                } else if (4 == opt) {
                        naja->regs[rd] = -naja->regs[rs];
 
-                       printf("NEG    r%d, r%d\n", rd, rs);
+                       NAJA_PRINTF("NEG    r%d, r%d\n", rd, rs);
 
                } else if (5 == opt) {
                        if (X) {
-                               printf("movsb  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movsb  r%d, r%d\n", rd, rs);
 
                                naja->regs[rd] = (int8_t)naja->regs[rs];
                        } else {
                                naja->regs[rd] = (uint8_t)naja->regs[rs];
 
-                               printf("movzb  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movzb  r%d, r%d\n", rd, rs);
                        }
                } else if (6 == opt) {
                        if (X) {
-                               printf("movsw  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movsw  r%d, r%d\n", rd, rs);
 
                                naja->regs[rd] = (int16_t)naja->regs[rs];
                        } else {
                                naja->regs[rd] = (uint16_t)naja->regs[rs];
 
-                               printf("movzw  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movzw  r%d, r%d\n", rd, rs);
                        }
                } else if (7 == opt) {
                        if (X) {
-                               printf("movsl  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movsl  r%d, r%d\n", rd, rs);
 
                                naja->regs[rd] = (int32_t)naja->regs[rs];
                        } else {
                                naja->regs[rd] = (uint32_t)naja->regs[rs];
 
-                               printf("movzl  r%d, r%d\n", rd, rs);
+                               NAJA_PRINTF("movzl  r%d, r%d\n", rd, rs);
                        }
                }
        }
@@ -1500,6 +1753,82 @@ static int __naja_mov(scf_vm_t* vm, uint32_t inst)
        return 0;
 }
 
+static int __naja_fmov(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs  =  inst & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 16) & 0xf;
+
+       if (0 == opt) {
+               naja->fvec[rd].d[0] = naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("fmov     d%d, d%d\n", rd, rs);
+
+       } else if (1 == opt) {
+               naja->fvec[rd].d[0] = naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("fss2sd   d%d, f%d\n", rd, rs);
+
+       } else if (2 == opt) {
+               naja->fvec[rd].d[0] = naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("fsd2ss   d%d, f%d\n", rd, rs);
+
+       } else if (3 == opt) {
+               naja->fvec[rd].d[0] = -naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("fneg     d%d, d%d\n", rd, rs);
+
+       } else if (4 == opt) {
+               naja->regs[rd] = (int64_t)naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("cvtss2si r%d, f%d\n", rd, rs);
+
+       } else if (5 == opt) {
+               naja->regs[rd] = (int64_t)naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("cvtsd2si r%d, d%d\n", rd, rs);
+
+       } else if (6 == opt) {
+               naja->regs[rd] = (uint64_t)naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("cvtss2ui r%d, f%d\n", rd, rs);
+
+       } else if (7 == opt) {
+               naja->regs[rd] = (uint64_t)naja->fvec[rs].d[0];
+
+               NAJA_PRINTF("cvtsd2ui r%d, d%d\n", rd, rs);
+
+       } else if (0xc == opt) {
+               naja->fvec[rd].d[0] = (double)naja->regs[rs];
+
+               NAJA_PRINTF("cvtsi2ss f%d, r%d\n", rd, rs);
+
+       } else if (0xd == opt) {
+               naja->fvec[rd].d[0] = (double)naja->regs[rs];
+
+               NAJA_PRINTF("cvtsi2sd d%d, r%d\n", rd, rs);
+
+       } else if (0xe == opt) {
+               naja->fvec[rd].d[0] = (double)naja->regs[rs];
+
+               NAJA_PRINTF("cvtui2ss f%d, r%d\n", rd, rs);
+
+       } else if (0xf == opt) {
+               naja->fvec[rd].d[0] = (double)naja->regs[rs];
+
+               NAJA_PRINTF("cvtui2sd f%d, r%d\n", rd, rs);
+       } else {
+               scf_loge("\n");
+               return -EINVAL;
+       }
+
+       naja->ip += 4;
+       return 0;
+}
+
 static naja_opcode_pt  naja_opcodes[64] =
 {
        __naja_add,      // 0
@@ -1519,22 +1848,22 @@ static naja_opcode_pt  naja_opcodes[64] =
        __naja_teq,      //14
        __naja_mov,      //15
 
-       NULL,            //16
-       NULL,            //17
-       NULL,            //18
-       NULL,            //19
-       NULL,            //20
-       NULL,            //21
+       __naja_fadd,     //16
+       __naja_fsub,     //17
+       __naja_fmul,     //18
+       __naja_fdiv,     //19
+       __naja_fldr_disp,//20
+       __naja_fstr_disp,//21
        NULL,            //22
        NULL,            //23
        __naja_call_disp,//24
-       NULL,            //25
+       __naja_fcmp,     //25
        __naja_call_reg, //26
        NULL,            //27
-       NULL,            //28
-       NULL,            //29
+       __naja_fldr_sib, //28
+       __naja_fstr_sib, //29
        NULL,            //30
-       NULL,            //31
+       __naja_fmov,     //31
 
        NULL,            //32
        NULL,            //33
@@ -1638,17 +1967,17 @@ static int __naja_vm_run(scf_vm_t* vm, const char* path, const char* sys)
                        return -EINVAL;
                }
 
-               printf("%4d, %#lx: ", n++, naja->ip);
+               NAJA_PRINTF("%4d, %#lx: ", n++, naja->ip);
                ret = pt(vm, inst);
                if (ret < 0) {
                        scf_loge("\n");
                        return ret;
                }
 
-               usleep(50 * 1000);
+//             usleep(10 * 1000);
        }
 
-       scf_logw("r0: %ld\n", naja->regs[0]);
+       scf_logw("r0: %ld, sizeof(fv256_t): %ld\n", naja->regs[0], sizeof(fv256_t));
        return naja->regs[0];
 }
 
index 828096ec6e4ed426ced36aa2b7cc76f471218a35..5a2dd0fc255101233436a8d66bb1ddf96d1c5a41 100644 (file)
@@ -1,9 +1,5 @@
 #include"scf_vm.h"
 
-#define NAJA_REG_FP   29
-#define NAJA_REG_LR   30
-#define NAJA_REG_SP   31
-
 static int __naja_add(scf_vm_t* vm, uint32_t inst)
 {
        scf_vm_naja_t* naja = vm->priv;
@@ -14,18 +10,18 @@ static int __naja_add(scf_vm_t* vm, uint32_t inst)
 
        if (I) {
                uint64_t uimm15 = (inst >> 5) & 0x7fff;
-               printf("add    r%d, r%d, %lu\n", rd, rs0, uimm15);
+               printf("add      r%d, r%d, %lu\n", rd, rs0, uimm15);
        } else {
                uint64_t sh     = (inst >> 18) & 0x3;
                uint64_t uimm8  = (inst >> 10) & 0xff;
                int      rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("add    r%d, r%d, r%d LSL %lu\n", rd, rs0, rs1, uimm8);
+                       printf("add      r%d, r%d, r%d LSL %lu\n", rd, rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("add    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
+                       printf("add      r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
                else
-                       printf("add    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
+                       printf("add      r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
        }
 
        return 0;
@@ -41,18 +37,18 @@ static int __naja_sub(scf_vm_t* vm, uint32_t inst)
 
        if (I) {
                uint64_t uimm15 = (inst >> 5) & 0x7fff;
-               printf("sub    r%d, r%d, %lu\n", rd, rs0, uimm15);
+               printf("sub      r%d, r%d, %lu\n", rd, rs0, uimm15);
        } else {
                uint64_t sh     = (inst >> 18) & 0x3;
                uint64_t uimm8  = (inst >> 10) & 0xff;
                int      rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("sub    r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
+                       printf("sub      r%d, r%d, r%d << %lu\n", rd, rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("sub    r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
+                       printf("sub      r%d, r%d, r%d LSR %lu\n", rd, rs0, rs1, uimm8);
                else
-                       printf("sub    r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
+                       printf("sub      r%d, r%d, r%d ASR %lu\n", rd, rs0, rs1, uimm8);
        }
 
        return 0;
@@ -63,25 +59,24 @@ static int __naja_cmp(scf_vm_t* vm, uint32_t inst)
        scf_vm_naja_t* naja = vm->priv;
 
        int rs0 =  inst        & 0x1f;
-       int rd  = (inst >> 21) & 0x1f;
        int I   = (inst >> 20) & 0x1;
 
        int ret = 0;
 
        if (I) {
-               uint64_t uimm15 = (inst >> 5) & 0x7fff;
-               printf("cmp    r%d, %ld,  rs0: %lx, ret: %d\n", rs0, uimm15, naja->regs[rs0], ret);
+               int uimm15 = (inst >> 5) & 0x7fff;
+               printf("cmp      r%d, %d\n", rs0, uimm15);
        } else {
-               uint64_t sh     = (inst >> 18) & 0x3;
-               uint64_t uimm8  = (inst >> 10) & 0xff;
-               int      rs1    = (inst >>  5) & 0x1f;
+               int sh     = (inst >> 18) & 0x3;
+               int uimm8  = (inst >> 10) & 0xff;
+               int rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("cmp    r%d, r%d LSL %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       printf("cmp      r%d, r%d LSL %d\n", rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("cmp    r%d, r%d LSR %ld,  rs0: %#lx, rs1: %#lx, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       printf("cmp      r%d, r%d LSR %d\n", rs0, rs1, uimm8);
                else
-                       printf("cmp    r%d, r%d ASR %ld,  rs0: %#lx, rs1: %ld, ret: %d\n", rs0, rs1, uimm8, naja->regs[rs0], naja->regs[rs1], ret);
+                       printf("cmp      r%d, r%d ASR %d\n", rs0, rs1, uimm8);
        }
 
        return 0;
@@ -100,18 +95,18 @@ static int __naja_mul(scf_vm_t* vm, uint32_t inst)
 
        if (S) {
                if (0 == opt)
-                       printf("smadd  r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("smadd    r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else if (1 == opt)
-                       printf("smsub  r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("smsub    r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else
-                       printf("smul   r%d, r%d, r%d\n", rd, rs0, rs1);
+                       printf("smul     r%d, r%d, r%d\n", rd, rs0, rs1);
        } else {
                if (0 == opt)
-                       printf("madd   r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("madd     r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else if (1 == opt)
-                       printf("msub   r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("msub     r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else
-                       printf("mul    r%d, r%d, r%d\n", rd, rs0, rs1);
+                       printf("mul      r%d, r%d, r%d\n", rd, rs0, rs1);
        }
 
        return 0;
@@ -130,18 +125,18 @@ static int __naja_div(scf_vm_t* vm, uint32_t inst)
 
        if (S) {
                if (0 == opt)
-                       printf("sdadd  r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("sdadd    r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else if (1 == opt)
-                       printf("sdsub  r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("sdsub    r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else
-                       printf("sdiv   r%d, r%d, r%d\n", rd, rs0, rs1);
+                       printf("sdiv     r%d, r%d, r%d\n", rd, rs0, rs1);
        } else {
                if (0 == opt)
-                       printf("dadd   r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("dadd     r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else if (1 == opt)
-                       printf("dsub   r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
+                       printf("dsub     r%d, r%d, r%d, r%d\n", rd, rs2, rs0, rs1);
                else
-                       printf("div    r%d, r%d, r%d\n", rd, rs0, rs1);
+                       printf("div      r%d, r%d, r%d\n", rd, rs0, rs1);
        }
 
        return 0;
@@ -163,51 +158,51 @@ static int __naja_ldr_disp(scf_vm_t* vm, uint32_t inst)
        switch (ext) {
                case 0:
                        if (A)
-                               printf("ldrb   r%d, [r%d, %d]!\n", rd, rb, s12);
+                               printf("ldrb     r%d, [r%d, %d]!\n", rd, rb, s12);
                        else
-                               printf("ldrb   r%d, [r%d, %d]\n", rd, rb, s12);
+                               printf("ldrb     r%d, [r%d, %d]\n", rd, rb, s12);
                        break;
 
                case 1:
                        if (A)
-                               printf("ldrw   r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                               printf("ldrw     r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
                        else
-                               printf("ldrw   r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                               printf("ldrw     r%d, [r%d, %d]\n", rd, rb, s12 << 1);
                        break;
 
                case 2:
                        if (A)
-                               printf("ldrl   r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                               printf("ldrl     r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
                        else
-                               printf("ldrl   r%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                               printf("ldrl     r%d, [r%d, %d]\n", rd, rb, s12 << 2);
                        break;
 
                case 3:
                        if (A)
-                               printf("ldr    r%d, [r%d, %d]!\n", rd, rb, s12 << 3);
+                               printf("ldr      r%d, [r%d, %d]!\n", rd, rb, s12 << 3);
                        else
-                               printf("ldr    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                               printf("ldr      r%d, [r%d, %d]\n", rd, rb, s12 << 3);
                        break;
 
                case 4:
                        if (A)
-                               printf("ldrsb  r%d, [r%d, %d]!\n", rd, rb, s12);
+                               printf("ldrsb    r%d, [r%d, %d]!\n", rd, rb, s12);
                        else
-                               printf("ldrsb  r%d, [r%d, %d]\n", rd, rb, s12);
+                               printf("ldrsb    r%d, [r%d, %d]\n", rd, rb, s12);
                        break;
 
                case 5:
                        if (A)
-                               printf("ldrsw  r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
+                               printf("ldrsw    r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
                        else
-                               printf("ldrsw  r%d, [r%d, %d]\n", rd, rb, s12 << 1);
+                               printf("ldrsw    r%d, [r%d, %d]\n", rd, rb, s12 << 1);
                        break;
 
                case 6:
                        if (A)
-                               printf("ldrsl  r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                               printf("ldrsl    r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
                        else
-                               printf("ldrsl  r%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                               printf("ldrsl    r%d, [r%d, %d]\n", rd, rb, s12 << 2);
                        break;
                default:
                        scf_loge("\n");
@@ -230,31 +225,31 @@ static int __naja_ldr_sib(scf_vm_t* vm, uint32_t inst)
 
        switch (ext) {
                case 0:
-                       printf("ldrb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrb    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 1:
-                       printf("ldrw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrw    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 2:
-                       printf("ldrl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrl    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 3:
-                       printf("ldr   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldr     r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 4:
-                       printf("ldrsb r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrsb   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 5:
-                       printf("ldrsw r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrsw   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 6:
-                       printf("ldrsl r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("ldrsl   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
                default:
                        scf_loge("\n");
@@ -281,30 +276,30 @@ static int __naja_str_disp(scf_vm_t* vm, uint32_t inst)
        switch (ext) {
                case 0:
                        if (A)
-                               printf("strb   r%d, [r%d, %d]!\n", rd, rb, s12);
+                               printf("strb     r%d, [r%d, %d]!\n", rd, rb, s12);
                        else
-                               printf("strb   r%d, [r%d, %d]\n", rd, rb, s12);
+                               printf("strb     r%d, [r%d, %d]\n", rd, rb, s12);
                        break;
 
                case 1:
                        if (A)
-                               printf("strw   r%d, [r%d, %d]!\n", rd, rb, s12);
+                               printf("strw     r%d, [r%d, %d]!\n", rd, rb, s12 << 1);
                        else
-                               printf("strw   r%d, [r%d, %d]\n", rd, rb, s12);
+                               printf("strw     r%d, [r%d, %d]\n", rd, rb, s12 << 1);
                        break;
 
                case 2:
                        if (A)
-                               printf("strl   r%d, [r%d, %d]!\n", rd, rb, s12);
+                               printf("strl     r%d, [r%d, %d]!\n", rd, rb, s12 << 2);
                        else
-                               printf("strl   r%d, [r%d, %d]\n", rd, rb, s12);
+                               printf("strl     r%d, [r%d, %d]\n", rd, rb, s12 << 2);
                        break;
 
                case 3:
                        if (A)
-                               printf("str    r%d, [r%d, %d]!\n", rd, rb, s12 << 3);
+                               printf("str      r%d, [r%d, %d]!\n", rd, rb, s12 << 3);
                        else
-                               printf("str    r%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                               printf("str      r%d, [r%d, %d]\n", rd, rb, s12 << 3);
                        break;
 
                default:
@@ -328,19 +323,19 @@ static int __naja_str_sib(scf_vm_t* vm, uint32_t inst)
 
        switch (ext) {
                case 0:
-                       printf("strb  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("strb    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 1:
-                       printf("strw  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("strw    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 2:
-                       printf("strl  r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("strl    r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                case 3:
-                       printf("str   r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       printf("str     r%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
                        break;
 
                default:
@@ -363,18 +358,18 @@ static int __naja_and(scf_vm_t* vm, uint32_t inst)
        if (I) {
                uint64_t uimm15 = (inst >> 5) & 0x7fff;
 
-               printf("and   r%d, r%d, %#lx\n", rd, rs0, uimm15);
+               printf("and     r%d, r%d, %#lx\n", rd, rs0, uimm15);
        } else {
                int sh     = (inst >> 18) & 0x3;
                int uimm8  = (inst >> 10) & 0xff;
                int rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("and   r%d, r%d, r%d LSL %#x\n", rd, rs0, rs1, uimm8);
+                       printf("and     r%d, r%d, r%d LSL %#x\n", rd, rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("and   r%d, r%d, r%d LSR %#x\n", rd, rs0, rs1, uimm8);
+                       printf("and     r%d, r%d, r%d LSR %#x\n", rd, rs0, rs1, uimm8);
                else
-                       printf("and   r%d, r%d, r%d ASR %#x\n", rd, rs0, rs1, uimm8);
+                       printf("and     r%d, r%d, r%d ASR %#x\n", rd, rs0, rs1, uimm8);
        }
 
        return 0;
@@ -391,18 +386,18 @@ static int __naja_teq(scf_vm_t* vm, uint32_t inst)
        if (I) {
                uint64_t uimm15 = (inst >> 5) & 0x7fff;
 
-               printf("teq   r%d, %#lx\n", rs0, uimm15);
+               printf("teq     r%d, %#lx\n", rs0, uimm15);
        } else {
                int sh     = (inst >> 18) & 0x3;
                int uimm8  = (inst >> 10) & 0xff;
                int rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("teq   r%d, r%d LSL %#x\n", rs0, rs1, uimm8);
+                       printf("teq     r%d, r%d LSL %#x\n", rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("teq   r%d, r%d LSR %#x\n", rs0, rs1, uimm8);
+                       printf("teq     r%d, r%d LSR %#x\n", rs0, rs1, uimm8);
                else
-                       printf("teq   r%d, r%d ASR %#x\n", rs0, rs1, uimm8);
+                       printf("teq     r%d, r%d ASR %#x\n", rs0, rs1, uimm8);
        }
 
        return 0;
@@ -419,18 +414,18 @@ static int __naja_or(scf_vm_t* vm, uint32_t inst)
        if (I) {
                uint64_t uimm15 = (inst >> 5) & 0x7fff;
 
-               printf("or    r%d, r%d, %#lx\n", rd, rs0, uimm15);
+               printf("or      r%d, r%d, %#lx\n", rd, rs0, uimm15);
        } else {
                int sh     = (inst >> 18) & 0x3;
                int uimm8  = (inst >> 10) & 0xff;
                int rs1    = (inst >>  5) & 0x1f;
 
                if (0 == sh)
-                       printf("or    r%d, r%d, r%d LSL %#x\n", rd, rs0, rs1, uimm8);
+                       printf("or      r%d, r%d, r%d LSL %#x\n", rd, rs0, rs1, uimm8);
                else if (1 == sh)
-                       printf("or    r%d, r%d, r%d LSR %#x\n", rd, rs0, rs1, uimm8);
+                       printf("or      r%d, r%d, r%d LSR %#x\n", rd, rs0, rs1, uimm8);
                else
-                       printf("or    r%d, r%d, r%d ASR %#x\n", rd, rs0, rs1, uimm8);
+                       printf("or      r%d, r%d, r%d ASR %#x\n", rd, rs0, rs1, uimm8);
        }
 
        return 0;
@@ -460,7 +455,7 @@ static int __naja_call_disp(scf_vm_t* vm, uint32_t inst)
                simm26 |= 0xfc000000;
 
        uint64_t ip = naja->ip + (simm26 << 2);
-       printf("call   %#lx\n", ip);
+       printf("call     %#lx\n", ip);
        return 0;
 }
 
@@ -481,22 +476,22 @@ static int __naja_jmp_reg(scf_vm_t* vm, uint32_t inst)
                uint64_t ip = naja->ip + s21;
 
                if (0 == cc)
-                       printf("jz     %#lx\n", ip);
+                       printf("jz       %#lx\n", ip);
 
                else if (1 == cc)
-                       printf("jnz    %#lx\n", ip);
+                       printf("jnz      %#lx\n", ip);
 
                else if (2 == cc)
-                       printf("jge    %#lx\n", ip);
+                       printf("jge      %#lx\n", ip);
 
                else if (3 == cc)
-                       printf("jgt    %#lx\n", ip);
+                       printf("jgt      %#lx\n", ip);
 
                else if (4 == cc)
-                       printf("jle    %#lx\n", ip);
+                       printf("jle      %#lx\n", ip);
 
                else if (5 == cc)
-                       printf("jlt    %#lx\n", ip);
+                       printf("jlt      %#lx\n", ip);
                else {
                        scf_loge("\n");
                        return -EINVAL;
@@ -505,7 +500,7 @@ static int __naja_jmp_reg(scf_vm_t* vm, uint32_t inst)
        } else {
                int rd = (inst >> 21) & 0x1f;
 
-               printf("jmp    *r%d\n", rd);
+               printf("jmp      *r%d\n", rd);
        }
        return 0;
 }
@@ -517,7 +512,7 @@ static int __naja_call_reg(scf_vm_t* vm, uint32_t inst)
 
        int rd = (inst >> 21) & 0x1f;
 
-       printf("call  r%d\n", rd);
+       printf("call    r%d\n", rd);
 
        return 0;
 }
@@ -532,7 +527,7 @@ static int __naja_adrp(scf_vm_t* vm, uint32_t inst)
        if (s21  & 0x100000)
                s21 |= ~0x1fffff;
 
-       printf("adrp   r%d, [rip, %d]\n", rd, s21);
+       printf("adrp     r%d, [rip, %d]\n", rd, s21);
 
        return 0;
 }
@@ -553,22 +548,22 @@ static int __naja_setcc(scf_vm_t* vm, uint32_t inst)
        int cc = (inst >> 17) & 0xf;
 
        if (SCF_VM_Z == cc)
-               printf("setz   r%d\n", rd);
+               printf("setz     r%d\n", rd);
 
        else if (SCF_VM_NZ == cc)
-               printf("setnz  r%d\n", rd);
+               printf("setnz    r%d\n", rd);
 
        else if (SCF_VM_GE == cc)
-               printf("setge  r%d\n", rd);
+               printf("setge    r%d\n", rd);
 
        else if (SCF_VM_GT == cc)
-               printf("setgt  r%d\n", rd);
+               printf("setgt    r%d\n", rd);
 
        else if (SCF_VM_LT == cc)
-               printf("setlt  r%d\n", rd);
+               printf("setlt    r%d\n", rd);
 
        else if (SCF_VM_LE == cc)
-               printf("setle  r%d\n", rd);
+               printf("setle    r%d\n", rd);
        else {
                scf_loge("inst: %#x\n", inst);
                return -EINVAL;
@@ -589,18 +584,18 @@ static int __naja_mov(scf_vm_t* vm, uint32_t inst)
        if (I) {
                if (0 == opt) {
                        if (X && (inst & 0x8000))
-                               printf("movsb  r%d, %d\n", rd, inst & 0xffff);
+                               printf("movsb    r%d, %d\n", rd, inst & 0xffff);
                        else
-                               printf("mov    r%d, %d\n", rd, inst & 0xffff);
+                               printf("mov      r%d, %d\n", rd, inst & 0xffff);
 
                } else if (1 == opt)
-                       printf("mov    r%d, %d << 16\n", rd, inst & 0xffff);
+                       printf("mov      r%d, %d << 16\n", rd, inst & 0xffff);
                else if (2 == opt)
-                       printf("mov    r%d, %d << 32\n", rd, inst & 0xffff);
+                       printf("mov      r%d, %d << 32\n", rd, inst & 0xffff);
                else if (3 == opt)
-                       printf("mov    r%d, %d << 48\n", rd, inst & 0xffff);
+                       printf("mov      r%d, %d << 48\n", rd, inst & 0xffff);
                else if (7 == opt)
-                       printf("mvn    r%d, %d\n", rd, inst & 0xffff);
+                       printf("mvn      r%d, %d\n", rd, inst & 0xffff);
 
        } else {
                int rs  =  inst & 0x1f;
@@ -609,61 +604,326 @@ static int __naja_mov(scf_vm_t* vm, uint32_t inst)
 
                if (0 == opt) {
                        if (X)
-                               printf("mov    r%d, r%d LSL r%d\n", rd, rs, rs1);
+                               printf("mov      r%d, r%d LSL r%d\n", rd, rs, rs1);
                        else {
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       printf("mov      r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d LSL %d\n", rd, rs, u11);
+                                       printf("mov      r%d, r%d LSL %d\n", rd, rs, u11);
                        }
                } else if (1 == opt) {
                        if (X)
-                               printf("mov    r%d, r%d LSR r%d\n", rd, rs, rs1);
+                               printf("mov      r%d, r%d LSR r%d\n", rd, rs, rs1);
                        else {
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       printf("mov      r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d LSR %d\n", rd, rs, u11);
+                                       printf("mov      r%d, r%d LSR %d\n", rd, rs, u11);
                        }
 
                } else if (2 == opt) {
                        if (X)
-                               printf("mov    r%d, r%d ASR r%d\n", rd, rs, rs1);
+                               printf("mov      r%d, r%d ASR r%d\n", rd, rs, rs1);
                        else {
                                if (0 == u11)
-                                       printf("mov    r%d, r%d\n", rd, rs);
+                                       printf("mov      r%d, r%d\n", rd, rs);
                                else
-                                       printf("mov    r%d, r%d ASR %d\n", rd, rs, u11);
+                                       printf("mov      r%d, r%d ASR %d\n", rd, rs, u11);
                        }
 
                } else if (3 == opt)
-                       printf("NOT    r%d, r%d\n", rd, rs);
+                       printf("NOT      r%d, r%d\n", rd, rs);
                else if (4 == opt)
-                       printf("NEG    r%d, r%d\n", rd, rs);
+                       printf("NEG      r%d, r%d\n", rd, rs);
 
                else if (5 == opt) {
                        if (X)
-                               printf("movsb  r%d, r%d\n", rd, rs);
+                               printf("movsb    r%d, r%d\n", rd, rs);
                        else
-                               printf("movzb  r%d, r%d\n", rd, rs);
+                               printf("movzb    r%d, r%d\n", rd, rs);
 
                } else if (6 == opt) {
                        if (X)
-                               printf("movsw  r%d, r%d\n", rd, rs);
+                               printf("movsw    r%d, r%d\n", rd, rs);
                        else
-                               printf("movzw  r%d, r%d\n", rd, rs);
+                               printf("movzw    r%d, r%d\n", rd, rs);
 
                } else if (7 == opt) {
                        if (X)
-                               printf("movsl  r%d, r%d\n", rd, rs);
+                               printf("movsl    r%d, r%d\n", rd, rs);
                        else
-                               printf("movzl  r%d, r%d\n", rd, rs);
+                               printf("movzl    r%d, r%d\n", rd, rs);
                }
        }
 
        return 0;
 }
 
+static int __naja_fadd(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+
+       printf("fadd   r%d, r%d, r%d\n", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fsub(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+
+       printf("fsub   r%d, r%d, r%d\n", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fcmp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0  =  inst        & 0x1f;
+       int rs1  = (inst >>  5) & 0x1f;
+
+       printf("fcmp   d%d, d%d\n", rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fmul(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rs2 = (inst >> 10) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 18) & 0x3;
+
+       if (0 == opt)
+               printf("fmadd   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       else if (1 == opt)
+               printf("fmsub   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       else
+               printf("fmul    d%d, d%d, d%d", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fdiv(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs0 =  inst        & 0x1f;
+       int rs1 = (inst >>  5) & 0x1f;
+       int rs2 = (inst >> 10) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 18) & 0x3;
+
+       if (0 == opt)
+               printf("fdadd   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       else if (1 == opt)
+               printf("fdsub   d%d, d%d, d%d, d%d", rd, rs2, rs0, rs1);
+       else
+               printf("fdiv    d%d, d%d, d%d", rd, rs0, rs1);
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fstr_disp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
+       int ext = (inst >> 17) & 0x7;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       switch (ext) {
+               case 3:
+                       if (A)
+                               printf("fstr     d%d, [r%d, %d]!\n", rd, rb, s12 << 3);
+                       else
+                               printf("fstr     d%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                       break;
+
+               case 6:
+                       if (A)
+                               printf("fstrf    f%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                       else
+                               printf("fstrf    f%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                       break;
+               default:
+                       scf_loge("ext: %d\n", ext);
+                       return -1;
+                       break;
+       };
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fldr_disp(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int A   = (inst >> 20) & 0x1;
+       int ext = (inst >> 17) & 0x7;
+       int s12 = (inst >>  5) & 0xfff;
+
+       if (s12  & 0x800)
+               s12 |= 0xfffff000;
+
+       switch (ext) {
+               case 3:
+                       if (A)
+                               printf("fldr     d%d, [r%d, %d]!\n", rd, rb, s12 << 3);
+                       else
+                               printf("fldr     d%d, [r%d, %d]\n", rd, rb, s12 << 3);
+                       break;
+
+               case 6:
+                       if (A)
+                               printf("fldrf    f%d, [r%d, %d]!\n", rd, rb, s12 << 2);
+                       else
+                               printf("fldrf    f%d, [r%d, %d]\n", rd, rb, s12 << 2);
+                       break;
+               default:
+                       scf_loge("ext: %d\n", ext);
+                       return -1;
+                       break;
+       };
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fldr_sib(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int ri  = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int ext = (inst >> 17) & 0x7;
+       int u7  = (inst >> 10) & 0x7f;
+
+       switch (ext) {
+               case 3:
+                       printf("fldr  d%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       break;
+               case 6:
+                       printf("fldrf f%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       break;
+               default:
+                       scf_loge("\n");
+                       return -1;
+                       break;
+       };
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fstr_sib(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rb  =  inst        & 0x1f;
+       int ri  = (inst >>  5) & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int ext = (inst >> 17) & 0x3;
+       int u7  = (inst >> 10) & 0x7f;
+
+       switch (ext) {
+               case 3:
+                       printf("fstr  d%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       break;
+
+               case 6:
+                       printf("fldrf f%d, [r%d, r%d, %d]\n", rd, rb, ri, u7);
+                       break;
+               default:
+                       scf_loge("\n");
+                       return -1;
+                       break;
+       };
+
+       naja->ip += 4;
+       return 0;
+}
+
+static int __naja_fmov(scf_vm_t* vm, uint32_t inst)
+{
+       scf_vm_naja_t* naja = vm->priv;
+
+       int rs  =  inst & 0x1f;
+       int rd  = (inst >> 21) & 0x1f;
+       int opt = (inst >> 16) & 0xf;
+
+       if (0 == opt)
+               printf("fmov     d%d, d%d\n", rd, rs);
+
+       else if (1 == opt)
+               printf("fss2sd   d%d, f%d\n", rd, rs);
+
+       else if (2 == opt)
+               printf("fsd2ss   d%d, f%d\n", rd, rs);
+
+       else if (3 == opt)
+               printf("fneg     d%d, d%d\n", rd, rs);
+
+       else if (4 == opt)
+               printf("cvtss2si r%d, f%d\n", rd, rs);
+
+       else if (5 == opt)
+               printf("cvtsd2si r%d, d%d\n", rd, rs);
+
+       else if (6 == opt)
+               printf("cvtss2ui r%d, f%d\n", rd, rs);
+
+       else if (7 == opt)
+               printf("cvtsd2ui r%d, d%d\n", rd, rs);
+
+       else if (0xc == opt)
+               printf("cvtsi2ss f%d, r%d\n", rd, rs);
+
+       else if (0xd == opt)
+               printf("cvtsi2sd d%d, r%d\n", rd, rs);
+
+       else if (0xe == opt)
+               printf("cvtui2ss f%d, r%d\n", rd, rs);
+
+       else if (0xf == opt)
+               printf("cvtui2sd f%d, r%d\n", rd, rs);
+       else {
+               scf_loge("\n");
+               return -EINVAL;
+       }
+
+       naja->ip += 4;
+       return 0;
+}
+
+
 static naja_opcode_pt  naja_opcodes[64] =
 {
        __naja_add,      // 0
@@ -683,22 +943,22 @@ static naja_opcode_pt  naja_opcodes[64] =
        __naja_teq,      //14
        __naja_mov,      //15
 
-       NULL,            //16
-       NULL,            //17
-       NULL,            //18
-       NULL,            //19
-       NULL,            //20
-       NULL,            //21
+       __naja_fadd,     //16
+       __naja_fsub,     //17
+       __naja_fmul,     //18
+       __naja_fdiv,     //19
+       __naja_fldr_disp,//20
+       __naja_fstr_disp,//21
        NULL,            //22
        NULL,            //23
        __naja_call_disp,//24
-       NULL,            //25
+       __naja_fcmp,     //25
        __naja_call_reg, //26
        NULL,            //27
-       NULL,            //28
-       NULL,            //29
+       __naja_fldr_sib, //28
+       __naja_fstr_sib, //29
        NULL,            //30
-       NULL,            //31
+       __naja_fmov,     //31
 
        NULL,            //32
        NULL,            //33
index d9c02b0173c64c22e00e6c45d0cbdd8f5c59096d..54eea6fad6ebdc9e8bd1705d09fa3fec8b8ba7fb 100644 (file)
@@ -17,6 +17,6 @@ int main()
        }
 
        printf("main ok\n");
-       return 0;
+       return ret;
 }