diff --git a/docs/build.md b/docs/build.md index be29e1107fa..7925e2697f6 100644 --- a/docs/build.md +++ b/docs/build.md @@ -775,7 +775,7 @@ git switch dev_backend_openvino - **Windows:** ```bash - "C:\Program Files (x86)\Intel\openvino_2025.3.0\setupvars.bat" + "\setupvars.bat" cmake -B build\ReleaseOV -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_OPENVINO=ON -DGGML_CPU_REPACK=OFF -DLLAMA_CURL=OFF -DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake cmake --build build\ReleaseOV --parallel ``` diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp index de986ea42d6..8d6a0dbf335 100644 --- a/ggml/src/ggml-openvino/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino.cpp @@ -140,7 +140,7 @@ static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_bu // Put kvcache on device memory for GPU (NPU memory is too small even for kvcache) if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_ANY && strncmp(tensor->name, "cache_", 6) == 0 && !ctx->is_remote && - ggml_openvino_get_device_name() == "GPU") { + ggml_openvino_get_device_name() == "GPU" && !getenv("GGML_OPENVINO_STATEFUL_EXECUTION")) { GGML_ASSERT(ctx->tensor_extras.empty()); auto device = ctx->device; auto size = ctx->size; @@ -943,7 +943,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con } static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_openvino_host(buft); + return ggml_backend_buft_is_openvino(buft) || ggml_backend_buft_is_host(buft); GGML_UNUSED(dev); } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index f7d62588c87..2d30eef941f 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -768,14 +768,12 @@ graph_key compute_graph_key(ggml_cgraph * cgraph) { graph_key key; key.n_nodes = cgraph->n_nodes; - if (cgraph->n_nodes > 0) { - key.first_node_name = std::string(cgraph->nodes[0]->name); - key.last_node_name = std::string(cgraph->nodes[cgraph->n_nodes - 1]->name); - } else { - key.first_node_name = ""; - key.last_node_name = ""; + for (int i = 0; i < cgraph->n_nodes; ++i) { + const auto * node = cgraph->nodes[i]; + if (node->op == GGML_OP_SET_ROWS && strncmp(node->src[2]->name, "cache_k_l0", 10) == 0) { + key.cache_k_l0 = node->src[2]; + } } - return key; } diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h index 47bf2d4ff17..72ef904f741 100644 --- a/ggml/src/ggml-openvino/utils.h +++ b/ggml/src/ggml-openvino/utils.h @@ -8,20 +8,17 @@ struct graph_key { size_t n_nodes; - std::string first_node_name; - std::string last_node_name; + void * cache_k_l0; bool operator==(const graph_key & other) const { - return n_nodes == other.n_nodes && first_node_name == other.first_node_name && - last_node_name == other.last_node_name; + return n_nodes == other.n_nodes && cache_k_l0 == other.cache_k_l0; } }; struct graph_key_hash { size_t operator()(const graph_key & key) const { size_t h = std::hash{}(key.n_nodes); - h ^= std::hash{}(key.first_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2); - h ^= std::hash{}(key.last_node_name) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(key.cache_k_l0) + 0x9e3779b9 + (h << 6) + (h >> 2); return h; } };