diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4abebf8c38b4a4a32890ca517ddfc2633568776e..1b8440aa923e172574eeda3c8603cdc21705ad10 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,11 +2,21 @@ image: archlinux/base
 
 stages:
     - build
+    - test
 
 build:
     stage: build
     script: 
+        - pacman -Sy && pacman -S --noconfirm bison flex gcc make gettext sdl2 lib32-glibc grep
+        - export AM_HOME=$(pwd)/nexus-am/
+        - cd nemu && make
+
+
+test:
+    stage: test
+    script:
         - pacman -Sy && pacman -S --noconfirm bison flex gcc make gettext sdl2 lib32-glibc grep
         - export AM_HOME=$(pwd)/nexus-am/
         - cd nemu && make && show_log=1 ./runall.sh
 
+
diff --git a/nemu/src/cpu/exec/data-mov.cc b/nemu/src/cpu/exec/data-mov.cc
index c890af268f76ccc11e7d4f20f881696df417899e..39345d1e41ce80e5d4616fc7cb2899fd839d9fcc 100644
--- a/nemu/src/cpu/exec/data-mov.cc
+++ b/nemu/src/cpu/exec/data-mov.cc
@@ -107,8 +107,7 @@ namespace EHelperImpl {
     // movsb, movsw, movsd
 
     // address size is always 32bit.
-    const auto source_index = cpu.esi,
-      dest_index = cpu.edi;
+    const auto source_index = cpu.esi, dest_index = cpu.edi;
     
     auto copy_bytes = 4;
     if(decoding.opcode == 0xa4) {
@@ -123,8 +122,11 @@ namespace EHelperImpl {
     else {
       // movsd
     }
-
     vaddr_write(dest_index, vaddr_read(source_index, 4), copy_bytes);
+
+    auto index_inc = copy_bytes * ( cpu_eflags::get<cpu_eflags::DF>() ? (-1) : 1 );
+    cpu.esi += index_inc;
+    cpu.edi += index_inc;
     
     print_asm_template2(movs);
   }
diff --git a/nemu/src/cpu/reg.cc b/nemu/src/cpu/reg.cc
index 2eba473a5cdbe7b231f0631e249ad9981cfe1f4a..76e0f51919417e12e5e9e0ff89a7e2ccdf2d2738 100644
--- a/nemu/src/cpu/reg.cc
+++ b/nemu/src/cpu/reg.cc
@@ -8,15 +8,27 @@ const char *regsl[] = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"};
 const char *regsw[] = {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"};
 const char *regsb[] = {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"};
 
-inline void rcpu_bootstrap_check_1() {
-  assert(sizeof(rtlreg_t) == sizeof(uint32_t));
-  assert(sizeof(vaddr_t) == sizeof(uint32_t));
-  printf("Recolic bootstrap test %s passed.\n", __FILE__);
-}
+struct rcpu_bootstrap_check {
+  static_assert(sizeof(rtlreg_t) == sizeof(uint32_t));
+  static_assert(sizeof(vaddr_t) == sizeof(uint32_t));
+
+  rcpu_bootstrap_check() {
+    /* https://www.agner.org/optimize/calling_conventions.pdf
+      Direction flag
+      The rules for the direction flag is the same in all systems. The direction flag 
+      is cleared by default. If the direction flag is set, then it must be cleared again
+      before any call or return. Some compilers and subroutine libraries rely on the 
+      direction flag always being clear (Microsoft, Watcom, Digital Mars) while other 
+      systems use the double-safe strategy of always leaving the direction flag cleared, 
+      but not relying on receiving it cleared (Borland, Gnu).
+    */
+    cpu_eflags::get<cpu_eflags::DF>() = false;
+    printf("R-CPU bootstrap %s finished.\n", __FILE__);
+  }
+};
+rcpu_bootstrap_check rcpu_bootstrap_check_instance;
 
 void reg_test() {
-  rcpu_bootstrap_check_1();
-
   srand(time(0));
   uint32_t sample[8];
   uint32_t eip_sample = rand();