diff --git a/3rdparty/llama.cpp b/3rdparty/llama.cpp
index 1f86f058d..0e7e4f387 160000
--- a/3rdparty/llama.cpp
+++ b/3rdparty/llama.cpp
@@ -1 +1 @@
-Subproject commit 1f86f058de0c3f4098dedae2ae8653c335c868a1
+Subproject commit 0e7e4f387c4a49e15b06a26b98d353cb3402c435
diff --git a/setup_env.py b/setup_env.py
index 3bf5fb8f7..4ef7683b8 100644
--- a/setup_env.py
+++ b/setup_env.py
@@ -104,7 +104,7 @@ def run_command(command, shell=False, log_step=None):
             subprocess.run(command, shell=shell, check=True)
         except subprocess.CalledProcessError as e:
             logging.error(f"Error occurred while running command: {e}")
-        sys.exit(1)
+            sys.exit(1)
 
 def prepare_model():
     _, arch = system_info()
diff --git a/src/ggml-bitnet-mad.cpp b/src/ggml-bitnet-mad.cpp
index 4ba9d6509..ad18bac04 100644
--- a/src/ggml-bitnet-mad.cpp
+++ b/src/ggml-bitnet-mad.cpp
@@ -808,7 +808,7 @@ void ggml_vec_dot_i2_i8_s_Nx1(int n, float * s, size_t bs, const void * vx, size
             accu[iy] = _mm256_setzero_si256();
         }
 
-        int8_t * y_col = y + col * by;
+        const int8_t * y_col = y + col * by;
         
         for (int i = 0; i < group32_num; i++) {
             const uint8_t *px = x + i * 1024;
diff --git a/utils/e2e_benchmark.py b/utils/e2e_benchmark.py
index 07f93ed72..464780bd5 100644
--- a/utils/e2e_benchmark.py
+++ b/utils/e2e_benchmark.py
@@ -20,7 +20,7 @@ def run_command(command, shell=False, log_step=None):
             subprocess.run(command, shell=shell, check=True)
         except subprocess.CalledProcessError as e:
             logging.error(f"Error occurred while running command: {e}")
-        sys.exit(1)
+            sys.exit(1)
 
 def run_benchmark():
     build_dir =  os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "build")