ravi9 · I-N-T-E-L · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 24, 2026
diff --git a/docs/build.md b/docs/build.md
@@ -775,7 +775,7 @@ git switch dev_backend_openvino
 
 - **Windows:**
     ```bash
-    "C:\Program Files (x86)\Intel\openvino_2025.3.0\setupvars.bat"
+    "<OPENVINO_INSTALLATION_DIR>\setupvars.bat"
     cmake -B build\ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF -DLLAMA_CURL=OFF -DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake
     cmake --build build\ReleaseOV --parallel
     ```

diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp
@@ -140,7 +140,7 @@ static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_bu
 
     // Put kvcache on device memory for GPU (NPU memory is too small even for kvcache)
     if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY && strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote &&
-        ggml_openvino_get_device_name() == "GPU") {
+        ggml_openvino_get_device_name() == "GPU" && !getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) {
         GGML_ASSERT(ctx->tensor_extras.empty());
         auto device = ctx->device;
         auto size = ctx->size;
@@ -943,7 +943,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
 }
 
 static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
-    return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_openvino_host(buft);
+    return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_host(buft);
     GGML_UNUSED(dev);
 }
 

diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
@@ -768,14 +768,12 @@ graph_key compute_graph_key(ggml_cgraph * cgraph) {
     graph_key key;
     key.n_nodes = cgraph->n_nodes;
 
-    if (cgraph->n_nodes > 0) {
-        key.first_node_name = std::string(cgraph->nodes[0]->name);
-        key.last_node_name = std::string(cgraph->nodes[cgraph->n_nodes - 1]->name);
-    } else {
-        key.first_node_name = "";
-        key.last_node_name = "";
+    for (int i = 0; i < cgraph->n_nodes; ++i) {
+        const auto * node = cgraph->nodes[i];
+        if (node->op == GGML_OP_SET_ROWS && strncmp(node->src[2]->name, "cache_k_l0", 10) == 0) {
+            key.cache_k_l0 = node->src[2];
+        }
     }
-
     return key;
 }
 

diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h
@@ -8,20 +8,17 @@
 
 struct graph_key {
     size_t n_nodes;
-    std::string first_node_name;
-    std::string last_node_name;
+    void * cache_k_l0;
 
     bool operator==(const graph_key & other) const {
-        return n_nodes == other.n_nodes && first_node_name == other.first_node_name &&
-               last_node_name == other.last_node_name;
+        return n_nodes == other.n_nodes && cache_k_l0 == other.cache_k_l0;
     }
 };
 
 struct graph_key_hash {
     size_t operator()(const graph_key & key) const {
         size_t h = std::hash<size_t>{}(key.n_nodes);
-        h ^= std::hash<std::string>{}(key.first_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2);
-        h ^= std::hash<std::string>{}(key.last_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2);
+        h ^= std::hash<void *>{}(key.cache_k_l0) + 0x9e3779b9 + (h << 6) + (h >> 2);
         return h;
     }
 };